home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Celestin Apprentice 5
/
Apprentice-Release5.iso
/
Source Code
/
Libraries
/
DCLAP 6d
/
dclap6d
/
SeqPups
/
appsrc
/
fastdnaml.src
/
fastDNAml.c
< prev
next >
Wrap
Text File
|
1996-07-05
|
147KB
|
5,203 lines
#define programName "fastDNAml"
#define programVersion "1.1.1a"
#define programVersionInt 10101
#define programDate "November 2, 1994"
#define programDateInt 19941102
/* fastDNAml
*
* Olsen, G. J., Matsuda, H., Hagstrom, R., and Overbeek, R. 1994.
* fastDNAml: A tool for construction of phylogenetic trees of DNA
* sequences using maximum likelihood. Comput. Appl. Biosci. 10: 41-48.
*
*
* Based in (large) part on the program dnaml by Joseph Felsenstein.
*
* Felsenstein, J. 1981. Evolutionary trees from DNA sequences:
* A maximum likelihood approach. J. Mol. Evol. 17: 368-376.
*
*
* Copyright notice from dnaml:
*
* version 3.3. (c) Copyright 1986, 1990 by the University of Washington
* and Joseph Felsenstein. Written by Joseph Felsenstein. Permission is
* granted to copy and use this program provided no fee is charged for it
* and provided that this copyright notice is not removed.
*/
/* Conversion to C and changes in sequential code by Gary Olsen, 1991-1994
*
* p4 version by Hideo Matsuda and Ross Overbeek, 1991-1993
*/
/*
* 1.0 March 14, 1992
* Initial "release" version
*
* 1.0.1 March 18, 1992
* Add ntaxa to tree comments
* Set minimum branch length on reading tree
* Add blanks around operators in treeString (for prolog parsing)
* Add program version to treeString comments
*
* 1.0.2 April 6, 1992
* Improved option line diagnostics
* Improved auxiliary line diagnostics
* Removed some trailing blanks from output
*
* 1.0.3 April 6, 1992
* Checkpoint trees that do not need any optimization
* Print restart tree likelihood before optimizing
* Fix treefile option so that it really toggles
*
* 1.0.4 July 13, 1992
* Add support for tree fact (instead of true Newick tree) in
* processTreeCom, treeReadLen, str_processTreeCom and
* str_treeReadLen
* Use bit operations in randum
* Correct error in bootstrap mask used with weighting mask
*
* 1.0.5 August 22, 1992
* Fix reading of underscore as first nonblank character in name
* Add strchr and strstr functions to source code
* Add output treefile name to message "Tree also written ..."
*
* 1.0.6 November 20, 1992
* Change (! nsites) test in setupTopol to (nsites == 0) for MIPS R4000
* Add vectorizing compiler directives for CRAY
* Include updates and corrections to parallel code from H. Matsuda
*
* 1.0.7 March 25, 1993
* Remove translation of underlines in taxon names
*
* 1.0.8 April 30, 1993
* Remove version number from fastDNAml.h file name
*
* 1.0.9 August 12, 1993
* Version was never released.
* Redefine treefile formats and default:
* 0 None
* 1 Newick
* 2 Prolog
* 3 PHYLIP (Default)
* Remove quote marks and comment from PHYLIP treefile format.
*
* 1.1.0 September 3-5, 1993
* Arrays of size maxpatterns moved from stack to heap (mallocs) in
* evaluateslope, makenewz, and cmpBestTrees.
* Correct [maxsites] to [maxpatterns] in temporary array definitions
* in Vectorize code of newview and evaluate. (These should also
* get converted to use malloc() at some point.)
* Change randum to use 12 bit segments, not 6. Change its seed
* parameter to long *.
* Remove the code that took the absolute value of random seeds.
* Correct integer divide artifact in setting default transition/
* transversion parameter values.
* When transition/transversion ratio is "reset", change to small
* value, not the program default.
* Report the "reset" transition/transversion ratio in the output.
* Move global data into analdef, rawDNA, and crunchedDNA structures.
* Change names of routines white and digit to whitechar and digitchar.
* Convert y[] to yType, which is normally char, but is int if the
* Vectorize flag is set.
* Split option line reading out of getoptions routine.
*
* 1.1.1 September 30, 1994
* Incorporate changes made in 1.0.A (Feb. 11, 1994):
* Remove processing of quotation marks within comments.
* Break label finding into copy to string and find tip.
* Generalize tree reading to read trees when names are and are not
* already known.
* Remove absolute value from randum seed reading.
* Include integer version number and program date.
* Remove maxsite, maxpatterns and maxsp limitations.
* Incorporate code for retaining multiple trees.
* Activate code for Hasegawa & Kishino test of tree differences.
* Make quick add the default, with Q turning it off.
* Make emperical frequencies the option with F turning it off.
* Allow a residue frequency option line anywhere in the options.
* Increase string length passed to treeString (should be length
* checked, but ...)
* Introduce (Sept.30) and fix (Oct. 26) bug in bootstrap sampling.
* Fix error when user frequencies are last line and start with F.
*/
#ifdef Master
# undef Master
# define Master 1
# define Slave 0
# define Sequential 0
#else
# ifdef Slave
# undef Slave
# define Master 0
# define Slave 1
# define Sequential 0
# else
# ifdef Sequential
# undef Sequential
# endif
# define Master 0
# define Slave 0
# define Sequential 1
# endif
#endif
#ifdef CRAY
# define Vectorize
#endif
#ifdef Vectorize
# define maxpatterns 10000 /* maximum number of different site patterns */
#endif
#include <stdio.h>
#include <math.h>
#include "fastDNAml.h" /* Requires version 1.1 */
#if Master || Slave
# include "p4.h"
# include "comm_link.h"
#endif
/* Global variables */
xarray *usedxtip, *freextip;
#if Sequential /* Use standard input */
# undef DNAML_STEP
# define DNAML_STEP 0
#ifdef NoSTDIN
FILE *INFILE;
#else
# define INFILE stdin
#endif
#endif
#if Master
# define MAX_SEND_AHEAD 400
char *best_tr_recv = NULL; /* these are used for flow control */
double best_lk_recv;
int send_ahead = 0; /* number of outstanding sends */
# ifdef DNAML_STEP
# define DNAML_STEP 1
# endif
# define INFILE Seqf
# define OUTFILE Outf
FILE *INFILE, *OUTFILE;
comm_block comm_slave;
#endif
#if Slave
# undef DNAML_STEP
# define DNAML_STEP 0
# define INFILE Seqf
# define OUTFILE Outf
FILE *INFILE, *OUTFILE;
comm_block comm_master;
#endif
#if Debug
FILE *debug;
#endif
#if DNAML_STEP
int begin_step_time, end_step_time;
# define REPORT_ADD_SPECS p4_send(DNAML_ADD_SPECS, DNAML_HOST_ID, NULL, 0)
# define REPORT_SEND_TREE p4_send(DNAML_SEND_TREE, DNAML_HOST_ID, NULL, 0)
# define REPORT_RECV_TREE p4_send(DNAML_RECV_TREE, DNAML_HOST_ID, NULL, 0)
# define REPORT_STEP_TIME \
{\
char send_buf[80]; \
end_step_time = p4_clock(); \
(void) sprintf(send_buf, "%d", end_step_time-begin_step_time); \
p4_send(DNAML_STEP_TIME, DNAML_HOST_ID, send_buf,strlen(send_buf)+1); \
begin_step_time = end_step_time; \
}
#else
# define REPORT_ADD_SPECS
# define REPORT_SEND_TREE
# define REPORT_RECV_TREE
# define REPORT_STEP_TIME
#endif
/*=======================================================================*/
/* PROGRAM */
/*=======================================================================*/
/* Best tree handling for dnaml */
/*=======================================================================*/
/* Tip value comparisons
*
* Use void pointers to hide type from other routines. Only tipValPtr and
* cmpTipVal need to be changed to alter the nature of the values compared
* (e.g., names instead of node numbers).
*
* cmpTipVal(tipValPtr(nodeptr p), tipValPtr(nodeptr q)) == -1, 0 or 1.
*
* This provides direct comparison of tip values (for example, for
* definition of tr->start).
*/
void *tipValPtr (p) nodeptr p; { return (void *) & p->number; }
int cmpTipVal (v1, v2)
void *v1, *v2;
{ /* cmpTipVal */
int i1, i2;
i1 = *((int *) v1);
i2 = *((int *) v2);
return (i1 < i2) ? -1 : ((i1 == i2) ? 0 : 1);
} /* cmpTipVal */
/* These are the only routines that need to UNDERSTAND topologies */
topol *setupTopol (maxtips, nsites)
int maxtips, nsites;
{ /* setupTopol */
topol *tpl;
if (! (tpl = (topol *) Malloc(sizeof(topol))) ||
! (tpl->links = (connptr) Malloc((2*maxtips-3) * sizeof(connect))) ||
(nsites && ! (tpl->log_f
= (double *) Malloc(nsites * sizeof(double))))) {
printf("ERROR: Unable to get topology memory");
tpl = (topol *) NULL;
}
else {
if (nsites == 0) tpl->log_f = (double *) NULL;
tpl->likelihood = unlikely;
tpl->start = (node *) NULL;
tpl->nextlink = 0;
tpl->ntips = 0;
tpl->nextnode = 0;
tpl->opt_level = 0; /* degree of branch swapping explored */
tpl->scrNum = 0; /* position in sorted list of scores */
tpl->tplNum = 0; /* position in sorted list of trees */
tpl->log_f_valid = 0; /* log_f value sites */
tpl->prelabeled = TRUE;
tpl->smoothed = FALSE; /* branch optimization converged? */
}
return tpl;
} /* setupTopol */
void freeTopol (tpl)
topol *tpl;
{ /* freeTopol */
Free(tpl->links);
if (tpl->log_f) Free(tpl->log_f);
Free(tpl);
} /* freeTopol */
int saveSubtree (p, tpl)
nodeptr p;
topol *tpl;
/* Save a subtree in a standard order so that earlier branches
* from a node contain lower value tips than do second branches from
* the node. This code works with arbitrary furcations in the tree.
*/
{ /* saveSubtree */
connptr r, r0;
nodeptr q, s;
int t, t0, t1;
r0 = tpl->links;
r = r0 + (tpl->nextlink)++;
r->p = p;
r->q = q = p->back;
r->z = p->z;
r->descend = 0; /* No children (yet) */
if (q->tip) {
r->valptr = tipValPtr(q); /* Assign value */
}
else { /* Internal node, look at children */
s = q->next; /* First child */
do {
t = saveSubtree(s, tpl); /* Generate child's subtree */
t0 = 0; /* Merge child into list */
t1 = r->descend;
while (t1 && (cmpTipVal(r0[t1].valptr, r0[t].valptr) < 0)) {
t0 = t1;
t1 = r0[t1].sibling;
}
if (t0) r0[t0].sibling = t; else r->descend = t;
r0[t].sibling = t1;
s = s->next; /* Next child */
} while (s != q);
r->valptr = r0[r->descend].valptr; /* Inherit first child's value */
} /* End of internal node processing */
return r - r0;
} /* saveSubtree */
nodeptr minSubtreeTip (p0)
nodeptr p0;
{ /* minTreeTip */
nodeptr minTip, p, testTip;
if (p0->tip) return p0;
p = p0->next;
minTip = minSubtreeTip(p->back);
while ((p = p->next) != p0) {
testTip = minSubtreeTip(p->back);
if (cmpTipVal(tipValPtr(testTip), tipValPtr(minTip)) < 0)
minTip = testTip;
}
return minTip;
} /* minTreeTip */
nodeptr minTreeTip (p)
nodeptr p;
{ /* minTreeTip */
nodeptr minp, minpb;
minp = minSubtreeTip(p);
minpb = minSubtreeTip(p->back);
return cmpTipVal(tipValPtr(minp), tipValPtr(minpb)) < 0 ? minp : minpb;
} /* minTreeTip */
void saveTree (tr, tpl)
tree *tr;
topol *tpl;
/* Save a tree topology in a standard order so that first branches
* from a node contain lower value tips than do second branches from
* the node. The root tip should have the lowest value of all.
*/
{ /* saveTree */
connptr r;
double *tr_log_f, *tpl_log_f;
int i;
tpl->nextlink = 0; /* Reset link pointer */
r = tpl->links + saveSubtree(minTreeTip(tr->start), tpl); /* Save tree */
r->sibling = 0;
tpl->likelihood = tr->likelihood;
tpl->start = tr->start;
tpl->ntips = tr->ntips;
tpl->nextnode = tr->nextnode;
tpl->opt_level = tr->opt_level;
tpl->prelabeled = tr->prelabeled;
tpl->smoothed = tr->smoothed;
if (tpl_log_f = tpl->log_f) {
tr_log_f = tr->log_f;
i = tpl->log_f_valid = tr->log_f_valid;
while (--i >= 0) *tpl_log_f++ = *tr_log_f++;
}
else {
tpl->log_f_valid = 0;
}
} /* saveTree */
void copyTopol (tpl1, tpl2)
topol *tpl1, *tpl2;
{ /* copyTopol */
connptr r1, r2, r10, r20;
double *tpl1_log_f, *tpl2_log_f;
int i;
r10 = tpl1->links;
r20 = tpl2->links;
tpl2->nextlink = tpl1->nextlink;
r1 = r10;
r2 = r20;
i = 2 * tpl1->ntips - 3;
while (--i >= 0) {
r2->z = r1->z;
r2->p = r1->p;
r2->q = r1->q;
r2->valptr = r1->valptr;
r2->descend = r1->descend;
r2->sibling = r1->sibling;
r1++;
r2++;
}
if (tpl1->log_f_valid && tpl2->log_f) {
tpl1_log_f = tpl1->log_f;
tpl2_log_f = tpl2->log_f;
tpl2->log_f_valid = i = tpl1->log_f_valid;
while (--i >= 0) *tpl2_log_f++ = *tpl1_log_f++;
}
else {
tpl2->log_f_valid = 0;
}
tpl2->likelihood = tpl1->likelihood;
tpl2->start = tpl1->start;
tpl2->ntips = tpl1->ntips;
tpl2->nextnode = tpl1->nextnode;
tpl2->opt_level = tpl1->opt_level;
tpl2->prelabeled = tpl1->prelabeled;
tpl2->scrNum = tpl1->scrNum;
tpl2->tplNum = tpl1->tplNum;
tpl2->smoothed = tpl1->smoothed;
} /* copyTopol */
boolean restoreTree (tpl, tr)
topol *tpl;
tree *tr;
{ /* restoreTree */
void hookup();
boolean initrav();
connptr r;
nodeptr p, p0;
double *tr_log_f, *tpl_log_f;
int i;
/* Clear existing connections */
for (i = 1; i <= 2*(tr->mxtips) - 2; i++) { /* Uses p = p->next at tip */
p0 = p = tr->nodep[i];
do {
p->back = (nodeptr) NULL;
p = p->next;
} while (p != p0);
}
/* Copy connections from topology */
for (r = tpl->links, i = 0; i < tpl->nextlink; r++, i++) {
hookup(r->p, r->q, r->z);
}
tr->likelihood = tpl->likelihood;
tr->start = tpl->start;
tr->ntips = tpl->ntips;
tr->nextnode = tpl->nextnode;
tr->opt_level = tpl->opt_level;
tr->prelabeled = tpl->prelabeled;
tr->smoothed = tpl->smoothed;
if (tpl_log_f = tpl->log_f) {
tr_log_f = tr->log_f;
i = tr->log_f_valid = tpl->log_f_valid;
while (--i >= 0) *tr_log_f++ = *tpl_log_f++;
}
else {
tr->log_f_valid = 0;
}
return (initrav(tr, tr->start) && initrav(tr, tr->start->back));
} /* restoreTree */
int initBestTree (bt, newkeep, numsp, sites)
bestlist *bt;
int newkeep, numsp, sites;
{ /* initBestTree */
int i, nlogf;
bt->nkeep = 0;
if (bt->ninit <= 0) {
if (! (bt->start = setupTopol(numsp, sites))) return 0;
bt->ninit = -1;
bt->nvalid = 0;
bt->numtrees = 0;
bt->best = unlikely;
bt->improved = FALSE;
bt->byScore = (topol **) Malloc((newkeep+1) * sizeof(topol *));
bt->byTopol = (topol **) Malloc((newkeep+1) * sizeof(topol *));
if (! bt->byScore || ! bt->byTopol) {
fprintf(stderr, "initBestTree: Malloc failure\n");
return 0;
}
}
else if (ABS(newkeep) > bt->ninit) {
if (newkeep < 0) newkeep = -(bt->ninit);
else newkeep = bt->ninit;
}
if (newkeep < 1) { /* Use negative newkeep to clear list */
newkeep = -newkeep;
if (newkeep < 1) newkeep = 1;
bt->nvalid = 0;
bt->best = unlikely;
}
if (bt->nvalid >= newkeep) {
bt->nvalid = newkeep;
bt->worst = bt->byScore[newkeep]->likelihood;
}
else {
bt->worst = unlikely;
}
for (i = bt->ninit + 1; i <= newkeep; i++) {
nlogf = (i <= maxlogf) ? sites : 0;
if (! (bt->byScore[i] = setupTopol(numsp, nlogf))) break;
bt->byTopol[i] = bt->byScore[i];
bt->ninit = i;
}
return (bt->nkeep = MIN(newkeep, bt->ninit));
} /* initBestTree */
int resetBestTree (bt)
bestlist *bt;
{ /* resetBestTree */
bt->best = unlikely;
bt->worst = unlikely;
bt->nvalid = 0;
bt->improved = FALSE;
} /* resetBestTree */
boolean freeBestTree(bt)
bestlist *bt;
{ /* freeBestTree */
while (bt->ninit >= 0) freeTopol(bt->byScore[(bt->ninit)--]);
freeTopol(bt->start);
return TRUE;
} /* freeBestTree */
/* Compare two trees, assuming that each is in standard order. Return
* -1 if first preceeds second, 0 if they are identical, or +1 if first
* follows second in standard order. Lower number tips preceed higher
* number tips. A tip preceeds a corresponding internal node. Internal
* nodes are ranked by their lowest number tip.
*/
int cmpSubtopol (p10, p1, p20, p2)
connptr p10, p1, p20, p2;
{ /* cmpSubtopol */
connptr p1d, p2d;
int cmp;
if (! p1->descend && ! p2->descend) /* Two tips */
return cmpTipVal(p1->valptr, p2->valptr);
if (! p1->descend) return -1; /* p1 = tip, p2 = node */
if (! p2->descend) return 1; /* p2 = tip, p1 = node */
p1d = p10 + p1->descend;
p2d = p20 + p2->descend;
while (1) { /* Two nodes */
if (cmp = cmpSubtopol(p10, p1d, p20, p2d)) return cmp; /* Subtrees */
if (! p1d->sibling && ! p2d->sibling) return 0; /* Lists done */
if (! p1d->sibling) return -1; /* One done, other not */
if (! p2d->sibling) return 1; /* One done, other not */
p1d = p10 + p1d->sibling; /* Neither done */
p2d = p20 + p2d->sibling;
}
} /* cmpSubtopol */
int cmpTopol (tpl1, tpl2)
void *tpl1, *tpl2;
{ /* cmpTopol */
connptr r1, r2;
int cmp;
r1 = ((topol *) tpl1)->links;
r2 = ((topol *) tpl2)->links;
cmp = cmpTipVal(tipValPtr(r1->p), tipValPtr(r2->p));
if (cmp) return cmp;
return cmpSubtopol(r1, r1, r2, r2);
} /* cmpTopol */
int cmpTplScore (tpl1, tpl2)
void *tpl1, *tpl2;
{ /* cmpTplScore */
double l1, l2;
l1 = ((topol *) tpl1)->likelihood;
l2 = ((topol *) tpl2)->likelihood;
return (l1 > l2) ? -1 : ((l1 == l2) ? 0 : 1);
} /* cmpTplScore */
/* Find an item in a sorted list of n items. If the item is in the list,
* return its index. If it is not in the list, return the negative of the
* position into which it should be inserted.
*/
int findInList (item, list, n, cmpFunc)
void *item;
void *list[];
int n;
int (* cmpFunc)();
{ /* findInList */
int mid, hi, lo, cmp;
if (n < 1) return -1; /* No match; first index */
lo = 1;
mid = 0;
hi = n;
while (lo < hi) {
mid = (lo + hi) >> 1;
cmp = (* cmpFunc)(item, list[mid-1]);
if (cmp) {
if (cmp < 0) hi = mid;
else lo = mid + 1;
}
else return mid; /* Exact match */
}
if (lo != mid) {
cmp = (* cmpFunc)(item, list[lo-1]);
if (cmp == 0) return lo;
}
if (cmp > 0) lo++; /* Result of step = 0 test */
return -lo;
} /* findInList */
int findTreeInList (bt, tr)
bestlist *bt;
tree *tr;
{ /* findTreeInList */
topol *tpl;
tpl = bt->byScore[0];
saveTree(tr, tpl);
return findInList((void *) tpl, (void **) (& (bt->byTopol[1])),
bt->nvalid, cmpTopol);
} /* findTreeInList */
int saveBestTree (bt, tr)
bestlist *bt;
tree *tr;
{ /* saveBestTree */
double *tr_log_f, *tpl_log_f;
topol *tpl, *reuse;
int tplNum, scrNum, reuseScrNum, reuseTplNum, i, oldValid, newValid;
tplNum = findTreeInList(bt, tr);
tpl = bt->byScore[0];
oldValid = newValid = bt->nvalid;
if (tplNum > 0) { /* Topology is in list */
reuse = bt->byTopol[tplNum]; /* Matching topol */
reuseScrNum = reuse->scrNum;
reuseTplNum = reuse->tplNum;
}
/* Good enough to keep? */
else if (tr->likelihood < bt->worst) return 0;
else { /* Topology is not in list */
tplNum = -tplNum; /* Add to list (not replace) */
if (newValid < bt->nkeep) bt->nvalid = ++newValid;
reuseScrNum = newValid; /* Take worst tree */
reuse = bt->byScore[reuseScrNum];
reuseTplNum = (newValid > oldValid) ? newValid : reuse->tplNum;
if (tr->likelihood > bt->start->likelihood) bt->improved = TRUE;
}
scrNum = findInList((void *) tpl, (void **) (& (bt->byScore[1])),
oldValid, cmpTplScore);
scrNum = ABS(scrNum);
if (scrNum < reuseScrNum)
for (i = reuseScrNum; i > scrNum; i--)
(bt->byScore[i] = bt->byScore[i-1])->scrNum = i;
else if (scrNum > reuseScrNum) {
scrNum--;
for (i = reuseScrNum; i < scrNum; i++)
(bt->byScore[i] = bt->byScore[i+1])->scrNum = i;
}
if (tplNum < reuseTplNum)
for (i = reuseTplNum; i > tplNum; i--)
(bt->byTopol[i] = bt->byTopol[i-1])->tplNum = i;
else if (tplNum > reuseTplNum) {
tplNum--;
for (i = reuseTplNum; i < tplNum; i++)
(bt->byTopol[i] = bt->byTopol[i+1])->tplNum = i;
}
if (tpl_log_f = tpl->log_f) {
tr_log_f = tr->log_f;
i = tpl->log_f_valid = tr->log_f_valid;
while (--i >= 0) *tpl_log_f++ = *tr_log_f++;
}
else {
tpl->log_f_valid = 0;
}
tpl->scrNum = scrNum;
tpl->tplNum = tplNum;
bt->byTopol[tplNum] = bt->byScore[scrNum] = tpl;
bt->byScore[0] = reuse;
if (scrNum == 1) bt->best = tr->likelihood;
if (newValid == bt->nkeep) bt->worst = bt->byScore[newValid]->likelihood;
return scrNum;
} /* saveBestTree */
int startOpt (bt, tr)
bestlist *bt;
tree *tr;
{ /* startOpt */
int scrNum;
scrNum = saveBestTree(bt, tr);
copyTopol(bt->byScore[scrNum], bt->start);
bt->improved = FALSE;
return scrNum;
} /* startOpt */
int setOptLevel (bt, opt_level)
bestlist *bt;
int opt_level;
{ /* setOptLevel */
int tplNum, scrNum;
tplNum = findInList((void *) bt->start, (void **) (&(bt->byTopol[1])),
bt->nvalid, cmpTopol);
if (tplNum > 0) {
bt->byTopol[tplNum]->opt_level = opt_level;
scrNum = bt->byTopol[tplNum]->scrNum;
}
else {
scrNum = 0;
}
return scrNum;
} /* setOptLevel */
int recallBestTree (bt, rank, tr)
bestlist *bt;
int rank;
tree *tr;
{ /* recallBestTree */
if (rank < 1) rank = 1;
if (rank > bt->nvalid) rank = bt->nvalid;
if (rank > 0) if (! restoreTree(bt->byScore[rank], tr)) return FALSE;
return rank;
} /* recallBestTree */
/*=======================================================================*/
/* End of best tree routines */
/*=======================================================================*/
#if 0
void hang(msg) char *msg; {printf("Hanging around: %s\n", msg); while(1);}
#endif
boolean getnums (rdta)
rawDNA *rdta;
/* input number of species, number of sites */
{ /* getnums */
printf("\n%s, version %s, %s\n\n",
programName,
programVersion,
programDate);
printf("Based on Joseph Felsenstein's\n\n");
printf(" Nucleic acid sequence Maximum Likelihood method, ");
printf("version 3.3\n\n");
if (fscanf(INFILE, "%d %d", & rdta->numsp, & rdta->sites) != 2) {
printf("ERROR: Problem reading number of species and sites\n");
return FALSE;
}
printf("%d Species, %d Sites\n\n", rdta->numsp, rdta->sites);
if (rdta->numsp < 4) {
printf("TOO FEW SPECIES\n");
return FALSE;
}
if (rdta->sites < 1) {
printf("TOO FEW SITES\n");
return FALSE;
}
return TRUE;
} /* getnums */
boolean digitchar (ch) int ch; {return (ch >= '0' && ch <= '9'); }
boolean whitechar (ch) int ch;
{ return (ch == ' ' || ch == '\n' || ch == '\t');
}
void uppercase (chptr)
int *chptr;
/* convert character to upper case -- either ASCII or EBCDIC */
{ /* uppercase */
int ch;
ch = *chptr;
if ((ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r')
|| (ch >= 's' && ch <= 'z'))
*chptr = ch + 'A' - 'a';
} /* uppercase */
int base36 (ch)
int ch;
{ /* base36 */
if (ch >= '0' && ch <= '9') return (ch - '0');
else if (ch >= 'A' && ch <= 'I') return (ch - 'A' + 10);
else if (ch >= 'J' && ch <= 'R') return (ch - 'J' + 19);
else if (ch >= 'S' && ch <= 'Z') return (ch - 'S' + 28);
else if (ch >= 'a' && ch <= 'i') return (ch - 'a' + 10);
else if (ch >= 'j' && ch <= 'r') return (ch - 'j' + 19);
else if (ch >= 's' && ch <= 'z') return (ch - 's' + 28);
else return -1;
} /* base36 */
int itobase36 (i)
int i;
{ /* itobase36 */
if (i < 0) return '?';
else if (i < 10) return (i + '0');
else if (i < 19) return (i - 10 + 'A');
else if (i < 28) return (i - 19 + 'J');
else if (i < 36) return (i - 28 + 'S');
else return '?';
} /* itobase36 */
int findch (c)
int c;
{ /* findch */
int ch;
while ((ch = getc(INFILE)) != EOF && ch != c) ;
return ch;
} /* findch */
#if Master || Slave
int str_findch (treestrp, c)
char **treestrp;
int c;
{ /* str_findch */
int ch;
while ((ch = *(*treestrp)++) != NULL && ch != c) ;
return ch;
} /* str_findch */
#endif
boolean inputboot(adef)
analdef *adef;
/* read the bootstrap auxilliary info */
{ /* inputboot */
if (! adef->boot) {
printf("ERROR: Unexpected Bootstrap auxiliary data line\n");
return FALSE;
}
else if (fscanf(INFILE, "%ld", & adef->boot) != 1 ||
findch('\n') == EOF) {
printf("ERROR: Problem reading boostrap random seed value\n");
return FALSE;
}
return TRUE;
} /* inputboot */
boolean inputcategories (rdta)
rawDNA *rdta;
/* read the category rates and the categories for each site */
{ /* inputcategories */
int i, j, ch, ci;
if (rdta->categs >= 0) {
printf("ERROR: Unexpected Categories auxiliary data line\n");
return FALSE;
}
if (fscanf(INFILE, "%d", & rdta->categs) != 1) {
printf("ERROR: Problem reading number of rate categories\n");
return FALSE;
}
if (rdta->categs < 1 || rdta->categs > maxcategories) {
printf("ERROR: Bad number of categories: %d\n", rdta->categs);
printf("Must be in range 1 - %d\n", maxcategories);
return FALSE;
}
for (j = 1; j <= rdta->categs
&& fscanf(INFILE, "%lf", &(rdta->catrat[j])) == 1; j++) ;
if ((j <= rdta->categs) || (findch('\n') == EOF)) {
printf("ERROR: Problem reading rate values\n");
return FALSE;
}
for (i = 1; i <= nmlngth; i++) (void) getc(INFILE);
i = 1;
while (i <= rdta->sites) {
ch = getc(INFILE);
ci = base36(ch);
if (ci >= 0 && ci <= rdta->categs)
rdta->sitecat[i++] = ci;
else if (! whitechar(ch)) {
printf("ERROR: Bad category character (%c) at site %d\n", ch, i);
return FALSE;
}
}
if (findch('\n') == EOF) { /* skip to end of line */
printf("ERROR: Missing newline at end of category data\n");
return FALSE;
}
return TRUE;
} /* inputcategories */
boolean inputextra (adef)
analdef *adef;
{ /* inputextra */
if (fscanf(INFILE,"%d", & adef->extra) != 1 ||
findch('\n') == EOF) {
printf("ERROR: Problem reading extra info value\n");
return FALSE;
}
return TRUE;
} /* inputextra */
boolean inputfreqs (rdta)
rawDNA *rdta;
{ /* inputfreqs */
if (fscanf(INFILE, "%lf%lf%lf%lf",
& rdta->freqa, & rdta->freqc,
& rdta->freqg, & rdta->freqt) != 4 ||
findch('\n') == EOF) {
printf("ERROR: Problem reading user base frequencies data\n");
return FALSE;
}
rdta->freqread = TRUE;
return TRUE;
} /* inputfreqs */
boolean inputglobal (tr)
tree *tr;
/* input the global option information */
{ /* inputglobal */
int ch;
if (tr->global != -2) {
printf("ERROR: Unexpected Global auxiliary data line\n");
return FALSE;
}
if (fscanf(INFILE, "%d", &(tr->global)) != 1) {
printf("ERROR: Problem reading rearrangement region size\n");
return FALSE;
}
if (tr->global < 0) {
printf("WARNING: Global region size too small;\n");
printf(" value reset to local\n\n");
tr->global = 1;
}
else if (tr->global == 0) tr->partswap = 0;
else if (tr->global > tr->mxtips - 3) {
tr->global = tr->mxtips - 3;
}
while ((ch = getc(INFILE)) != '\n') { /* Scan for second value */
if (! whitechar(ch)) {
if (ch != EOF) (void) ungetc(ch, INFILE);
if (ch == EOF || fscanf(INFILE, "%d", &(tr->partswap)) != 1
|| findch('\n') == EOF) {
printf("ERROR: Problem reading insert swap region size\n");
return FALSE;
}
else if (tr->partswap < 0) tr->partswap = 1;
else if (tr->partswap > tr->mxtips - 3) {
tr->partswap = tr->mxtips - 3;
}
if (tr->partswap > tr->global) tr->global = tr->partswap;
break; /* Break while loop */
}
}
return TRUE;
} /* inputglobal */
boolean inputjumble (adef)
analdef *adef;
{ /* inputjumble */
if (! adef->jumble) {
printf("ERROR: Unexpected Jumble auxiliary data line\n");
return FALSE;
}
else if (fscanf(INFILE, "%ld", & adef->jumble) != 1 ||
findch('\n') == EOF) {
printf("ERROR: Problem reading jumble random seed value\n");
return FALSE;
}
else if (adef->jumble == 0) {
printf("WARNING: Jumble random number seed is zero\n\n");
}
return TRUE;
} /* inputjumble */
boolean inputkeep (adef)
analdef *adef;
{ /* inputkeep */
if (fscanf(INFILE, "%d", & adef->nkeep) != 1 ||
findch('\n') == EOF || adef->nkeep < 1) {
printf("ERROR: Problem reading number of kept trees\n");
return FALSE;
}
return TRUE;
} /* inputkeep */
boolean inputoutgroup (adef, tr)
analdef *adef;
tree *tr;
{ /* inputoutgroup */
if (! adef->root || tr->outgr > 0) {
printf("ERROR: Unexpected Outgroup auxiliary data line\n");
return FALSE;
}
else if (fscanf(INFILE, "%d", &(tr->outgr)) != 1 ||
findch('\n') == EOF) {
printf("ERROR: Problem reading outgroup number\n");
return FALSE;
}
else if ((tr->outgr < 1) || (tr->outgr > tr->mxtips)) {
printf("ERROR: Bad outgroup: '%d'\n", tr->outgr);
return FALSE;
}
return TRUE;
} /* inputoutgroup */
boolean inputratio (rdta)
rawDNA *rdta;
{ /* inputratio */
if (rdta->ttratio >= 0.0) {
printf("ERROR: Unexpected Transition/transversion auxiliary data\n");
return FALSE;
}
else if (fscanf(INFILE,"%lf", & rdta->ttratio)!=1 ||
findch('\n') == EOF) {
printf("ERROR: Problem reading transition/transversion ratio\n");
return FALSE;
}
return TRUE;
} /* inputratio */
/* Y 0 is treeNone (no tree)
Y 1 is treeNewick
Y 2 is treeProlog
Y 3 is treePHYLIP
*/
boolean inputtreeopt (adef)
analdef *adef;
{ /* inputtreeopt */
if (! adef->trout) {
printf("ERROR: Unexpected Treefile auxiliary data\n");
return FALSE;
}
else if (fscanf(INFILE,"%d", & adef->trout) != 1 ||
findch('\n') == EOF) {
printf("ERROR: Problem reading output tree-type number\n");
return FALSE;
}
else if ((adef->trout < 0) || (adef->trout > treeMaxType)) {
printf("ERROR: Bad output tree-type number: '%d'\n", adef->trout);
return FALSE;
}
return TRUE;
} /* inputtreeopt */
boolean inputweights (adef, rdta, cdta)
analdef *adef;
rawDNA *rdta;
crunchedDNA *cdta;
/* input the character weights 0, 1, 2 ... 9, A, B, ... Y, Z */
{ /* inputweights */
int i, ch, wi;
if (! adef->userwgt || cdta->wgtsum > 0) {
printf("ERROR: Unexpected Weights auxiliary data\n");
return FALSE;
}
for (i = 2; i <= nmlngth; i++) (void) getc(INFILE);
cdta->wgtsum = 0;
i = 1;
while (i <= rdta->sites) {
ch = getc(INFILE);
wi = base36(ch);
if (wi >= 0)
cdta->wgtsum += rdta->wgt[i++] = wi;
else if (! whitechar(ch)) {
printf("ERROR: Bad weight character: '%c'", ch);
printf(" Weights in dnaml must be a digit or a letter.\n");
return FALSE;
}
}
if (findch('\n') == EOF) { /* skip to end of line */
printf("ERROR: Missing newline at end of weight data\n");
return FALSE;
}
return TRUE;
} /* inputweights */
boolean getoptions (adef, rdta, cdta, tr)
analdef *adef;
rawDNA *rdta;
crunchedDNA *cdta;
tree *tr;
{ /* getoptions */
int ch, i, extranum;
adef->boot = 0; /* Don't bootstrap column weights */
adef->empf = TRUE; /* Use empirical base frequencies */
adef->extra = 0; /* No extra runtime info unless requested */
adef->interleaved = TRUE; /* By default, data format is interleaved */
adef->jumble = FALSE; /* Use random addition sequence */
adef->nkeep = 0; /* Keep only the one best tree */
adef->prdata = FALSE; /* Don't echo data to output stream */
adef->qadd = TRUE; /* Smooth branches globally in add */
adef->restart = FALSE; /* Restart from user tree */
adef->root = FALSE; /* User-defined outgroup rooting */
adef->trout = treeDefType; /* Output tree file */
adef->trprint = TRUE; /* Print tree to output stream */
rdta->categs = 0; /* No rate categories */
rdta->catrat[1] = 1.0; /* Rate values */
rdta->freqread = FALSE; /* User-defined frequencies not read yet */
rdta->ttratio = 2.0; /* Transition/transversion rate ratio */
tr->global = -1; /* Default search locale for optimum */
tr->mxtips = rdta->numsp;
tr->outgr = 1; /* Outgroup number */
tr->partswap = 1; /* Default to swap locally after insert */
tr->userlen = FALSE; /* User-supplied branch lengths */
adef->usertree = FALSE; /* User-defined tree topologies */
adef->userwgt = FALSE; /* User-defined position weights */
extranum = 0;
while ((ch = getc(INFILE)) != '\n' && ch != EOF) {
uppercase(& ch);
switch (ch) {
case '1' : adef->prdata = ! adef->prdata; break;
case '3' : adef->trprint = ! adef->trprint; break;
case '4' : adef->trout = treeDefType - adef->trout; break;
case 'B' : adef->boot = 1; extranum++; break;
case 'C' : rdta->categs = -1; extranum++; break;
case 'E' : adef->extra = -1; break;
case 'F' : adef->empf = ! adef->empf; break;
case 'G' : tr->global = -2; break;
case 'I' : adef->interleaved = ! adef->interleaved; break;
case 'J' : adef->jumble = 1; extranum++; break;
case 'K' : extranum++; break;
case 'L' : tr->userlen = TRUE; break;
case 'O' : adef->root = TRUE; tr->outgr = 0; extranum++; break;
case 'Q' : adef->qadd = FALSE; break;
case 'R' : adef->restart = TRUE; break;
case 'T' : rdta->ttratio = -1.0; extranum++; break;
case 'U' : adef->usertree = TRUE; break;
case 'W' : adef->userwgt = TRUE; cdta->wgtsum = 0; extranum++; break;
case 'Y' : adef->trout = treeDefType - adef->trout; break;
case ' ' : break;
case '\t': break;
default :
printf("ERROR: Bad option character: '%c'\n", ch);
return FALSE;
}
}
if (ch == EOF) {
printf("ERROR: End-of-file in options list\n");
return FALSE;
}
if (adef->usertree && adef->restart) {
printf("ERROR: The restart and user-tree options conflict:\n");
printf(" Restart adds rest of taxa to a starting tree;\n");
printf(" User-tree does not add any taxa.\n\n");
return FALSE;
}
if (adef->usertree && adef->jumble) {
printf("WARNING: The jumble and user-tree options conflict:\n");
printf(" Jumble adds taxa to a tree in random order;\n");
printf(" User-tree does not use taxa addition.\n");
printf(" Jumble option cancelled for this run.\n\n");
adef->jumble = FALSE;
}
if (tr->userlen && tr->global != -1) {
printf("ERROR: The global and user-lengths options conflict:\n");
printf(" Global optimizes a starting tree;\n");
printf(" User-lengths constrain the starting tree.\n\n");
return FALSE;
}
if (tr->userlen && ! adef->usertree) {
printf("WARNING: User lengths required user tree option.\n");
printf(" User-tree option set for this run.\n\n");
adef->usertree = TRUE;
}
rdta->wgt = (int *) Malloc((rdta->sites + 1) * sizeof(int));
rdta->wgt2 = (int *) Malloc((rdta->sites + 1) * sizeof(int));
rdta->sitecat = (int *) Malloc((rdta->sites + 1) * sizeof(int));
cdta->alias = (int *) Malloc((rdta->sites + 1) * sizeof(int));
cdta->aliaswgt = (int *) Malloc((rdta->sites + 1) * sizeof(int));
cdta->patcat = (int *) Malloc((rdta->sites + 1) * sizeof(int));
cdta->patrat = (double *) Malloc((rdta->sites + 1) * sizeof(double));
cdta->wr = (double *) Malloc((rdta->sites + 1) * sizeof(double));
cdta->wr2 = (double *) Malloc((rdta->sites + 1) * sizeof(double));
if ( ! rdta->wgt || ! rdta->wgt2 || ! rdta->sitecat || ! cdta->alias
|| ! cdta->aliaswgt || ! cdta->patcat || ! cdta->patrat
|| ! cdta->wr || ! cdta->wr2) {
fprintf(stderr, "getoptions: Malloc failure\n");
return 0;
}
/* process lines with auxiliary data */
while (extranum--) {
ch = getc(INFILE);
uppercase(& ch);
switch (ch) {
case 'B': if (! inputboot(adef)) return FALSE; break;
case 'C': if (! inputcategories(rdta)) return FALSE; break;
case 'E': if (! inputextra(adef)) return FALSE; extranum++; break;
case 'F': if (! inputfreqs(rdta)) return FALSE; break;
case 'G': if (! inputglobal(tr)) return FALSE; extranum++; break;
case 'J': if (! inputjumble(adef)) return FALSE; break;
case 'K': if (! inputkeep(adef)) return FALSE; break;
case 'O': if (! inputoutgroup(adef, tr)) return FALSE; break;
case 'T': if (! inputratio(rdta)) return FALSE; break;
case 'W': if (! inputweights(adef, rdta, cdta)) return FALSE; break;
case 'Y': if (! inputtreeopt(adef)) return FALSE; extranum++; break;
default:
printf("ERROR: Auxiliary options line starts with '%c'\n", ch);
return FALSE;
}
}
if (! adef->userwgt) {
for (i = 1; i <= rdta->sites; i++) rdta->wgt[i] = 1;
cdta->wgtsum = rdta->sites;
}
if (adef->userwgt && cdta->wgtsum < 1) {
printf("ERROR: Missing or bad user-supplied weight data.\n");
return FALSE;
}
if (adef->boot) {
printf("Bootstrap random number seed = %ld\n\n", adef->boot);
}
if (adef->jumble) {
printf("Jumble random number seed = %ld\n\n", adef->jumble);
}
if (adef->qadd) {
printf("Quick add (only local branches initially optimized) in effect\n\n");
}
if (rdta->categs > 0) {
printf("Site category Rate of change\n\n");
for (i = 1; i <= rdta->categs; i++)
printf(" %c%13.3f\n", itobase36(i), rdta->catrat[i]);
putchar('\n');
for (i = 1; i <= rdta->sites; i++) {
if ((rdta->wgt[i] > 0) && (rdta->sitecat[i] < 1)) {
printf("ERROR: Bad category (%c) at site %d\n",
itobase36(rdta->sitecat[i]), i);
return FALSE;
}
}
}
else if (rdta->categs < 0) {
printf("ERROR: Category auxiliary data missing from input\n");
return FALSE;
}
else { /* rdta->categs == 0 */
for (i = 1; i <= rdta->sites; i++) rdta->sitecat[i] = 1;
rdta->categs = 1;
}
if (tr->outgr < 1) {
printf("ERROR: Outgroup auxiliary data missing from input\n");
return FALSE;
}
if (rdta->ttratio < 0.0) {
printf("ERROR: Transition/transversion auxiliary data missing from input\n");
return FALSE;
}
if (tr->global < 0) {
if (tr->global == -2) tr->global = tr->mxtips - 3; /* Default global */
else tr->global = adef->usertree ? 0 : 1;/* No global */
}
if (adef->restart) {
printf("Restart option in effect. ");
printf("Sequence addition will start from appended tree.\n\n");
}
if (adef->usertree && ! tr->global) {
printf("User-supplied tree topology%swill be used.\n\n",
tr->userlen ? " and branch lengths " : " ");
}
else {
if (! adef->usertree) {
printf("Rearrangements of partial trees may cross %d %s.\n",
tr->partswap, tr->partswap == 1 ? "branch" : "branches");
}
printf("Rearrangements of full tree may cross %d %s.\n\n",
tr->global, tr->global == 1 ? "branch" : "branches");
}
if (! adef->usertree && adef->nkeep == 0) adef->nkeep = 1;
return TRUE;
} /* getoptions */
boolean getbasefreqs (rdta)
rawDNA *rdta;
{ /* getbasefreqs */
int ch;
if (rdta->freqread) return TRUE;
ch = getc(INFILE);
if (! ((ch == 'F') || (ch == 'f'))) (void) ungetc(ch, INFILE);
if (fscanf(INFILE, "%lf%lf%lf%lf",
& rdta->freqa, & rdta->freqc,
& rdta->freqg, & rdta->freqt) != 4 ||
findch('\n') == EOF) {
printf("ERROR: Problem reading user base frequencies\n");
return FALSE;
}
return TRUE;
} /* getbasefreqs */
boolean getyspace (rdta)
rawDNA *rdta;
{ /* getyspace */
long size;
int i;
yType *y0;
if (! (rdta->y = (yType **) Malloc((rdta->numsp + 1) * sizeof(yType *)))) {
printf("ERROR: Unable to obtain space for data array pointers\n");
return FALSE;
}
size = 4 * (rdta->sites / 4 + 1);
if (! (y0 = (yType *) Malloc((rdta->numsp + 1) * size * sizeof(yType)))) {
printf("ERROR: Unable to obtain space for data array\n");
return FALSE;
}
for (i = 0; i <= rdta->numsp; i++) {
rdta->y[i] = y0;
y0 += size;
}
return TRUE;
} /* getyspace */
boolean setupTree (tr, nsites)
tree *tr;
int nsites;
{ /* setupTree */
nodeptr p0, p, q;
int i, j, tips, inter;
tips = tr->mxtips;
inter = tr->mxtips - 1;
if (!(p0 = (nodeptr) Malloc((tips + 3*inter) * sizeof(node)))) {
printf("ERROR: Unable to obtain sufficient tree memory\n");
return FALSE;
}
if (!(tr->nodep = (nodeptr *) Malloc((2*tr->mxtips) * sizeof(nodeptr)))) {
printf("ERROR: Unable to obtain sufficient tree memory, too\n");
return FALSE;
}
tr->nodep[0] = (node *) NULL; /* Use as 1-based array */
for (i = 1; i <= tips; i++) { /* Set-up tips */
p = p0++;
p->x = (xarray *) NULL;
p->tip = (yType *) NULL;
p->number = i;
p->next = p;
p->back = (node *) NULL;
tr->nodep[i] = p;
}
for (i = tips + 1; i <= tips + inter; i++) { /* Internal nodes */
q = (node *) NULL;
for (j = 1; j <= 3; j++) {
p = p0++;
p->x = (xarray *) NULL;
p->tip = (yType *) NULL;
p->number = i;
p->next = q;
p->back = (node *) NULL;
q = p;
}
p->next->next->next = p;
tr->nodep[i] = p;
}
tr->likelihood = unlikely;
tr->start = (node *) NULL;
tr->outgrnode = tr->nodep[tr->outgr];
tr->ntips = 0;
tr->nextnode = 0;
tr->opt_level = 0;
tr->prelabeled = TRUE;
tr->smoothed = FALSE;
tr->log_f_valid = 0;
tr->log_f = (double *) Malloc(nsites * sizeof(double));
if (! tr->log_f) {
printf("ERROR: Unable to obtain sufficient tree memory, trey\n");
return FALSE;
}
return TRUE;
} /* setupTree */
void freeTreeNode (p) /* Free tree node (sector) associated data */
nodeptr p;
{ /* freeTreeNode */
if (p) {
if (p->x) {
if (p->x->a) Free(p->x->a);
Free(p->x);
}
}
} /* freeTreeNode */
void freeTree (tr)
tree *tr;
{ /* freeTree */
nodeptr p, q;
int i, tips, inter;
tips = tr->mxtips;
inter = tr->mxtips - 1;
for (i = 1; i <= tips; i++) freeTreeNode(tr->nodep[i]);
for (i = tips + 1; i <= tips + inter; i++) {
if (p = tr->nodep[i]) {
if (q = p->next) {
freeTreeNode(q->next);
freeTreeNode(q);
}
freeTreeNode(p);
}
}
Free(tr->nodep[1]); /* Free the actual nodes */
} /* freeTree */
boolean getdata (adef, rdta, tr)
analdef *adef;
rawDNA *rdta;
tree *tr;
/* read sequences */
{ /* getdata */
int i, j, k, l, basesread, basesnew, ch;
int meaning[256]; /* meaning of input characters */
char *nameptr;
boolean allread, firstpass;
for (i = 0; i <= 255; i++) meaning[i] = 0;
meaning['A'] = 1;
meaning['B'] = 14;
meaning['C'] = 2;
meaning['D'] = 13;
meaning['G'] = 4;
meaning['H'] = 11;
meaning['K'] = 12;
meaning['M'] = 3;
meaning['N'] = 15;
meaning['O'] = 15;
meaning['R'] = 5;
meaning['S'] = 6;
meaning['T'] = 8;
meaning['U'] = 8;
meaning['V'] = 7;
meaning['W'] = 9;
meaning['X'] = 15;
meaning['Y'] = 10;
meaning['?'] = 15;
meaning['-'] = 15;
basesread = basesnew = 0;
allread = FALSE;
firstpass = TRUE;
ch = ' ';
while (! allread) {
for (i = 1; i <= tr->mxtips; i++) { /* Read data line */
if (firstpass) { /* Read species names */
j = 1;
while (whitechar(ch = getc(INFILE))) { /* Skip blank lines */
if (ch == '\n') j = 1; else j++;
}
if (j > nmlngth) {
printf("ERROR: Blank name for species %d; ", i);
printf("check number of species,\n");
printf(" number of sites, and interleave option.\n");
return FALSE;
}
nameptr = tr->nodep[i]->name;
for (k = 1; k < j; k++) *nameptr++ = ' ';
while (ch != '\n' && ch != EOF) {
if (whitechar(ch)) ch = ' ';
*nameptr++ = ch;
if (++j > nmlngth) break;
ch = getc(INFILE);
}
while (*(--nameptr) == ' ') ; /* remove trailing blanks */
*(++nameptr) = '\0'; /* add null termination */
if (ch == EOF) {
printf("ERROR: End-of-file in name of species %d\n", i);
return FALSE;
}
} /* if (firstpass) */
j = basesread;
while ((j < rdta->sites)
&& ((ch = getc(INFILE)) != EOF)
&& ((! adef->interleaved) || (ch != '\n'))) {
uppercase(& ch);
if (meaning[ch] || ch == '.') {
j++;
if (ch == '.') {
if (i != 1) ch = rdta->y[1][j];
else {
printf("ERROR: Dot (.) found at site %d of sequence 1\n", j);
return FALSE;
}
}
rdta->y[i][j] = ch;
}
else if (whitechar(ch) || digitchar(ch)) ;
else {
printf("ERROR: Bad base (%c) at site %d of sequence %d\n",
ch, j, i);
return FALSE;
}
}
if (ch == EOF) {
printf("ERROR: End-of-file at site %d of sequence %d\n", j, i);
return FALSE;
}
if (! firstpass && (j == basesread)) i--; /* no data on line */
else if (i == 1) basesnew = j;
else if (j != basesnew) {
printf("ERROR: Sequences out of alignment\n");
printf("%d (instead of %d) residues read in sequence %d\n",
j - basesread, basesnew - basesread, i);
return FALSE;
}
while (ch != '\n' && ch != EOF) ch = getc(INFILE); /* flush line */
} /* next sequence */
firstpass = FALSE;
basesread = basesnew;
allread = (basesread >= rdta->sites);
}
/* Print listing of sequence alignment */
if (adef->prdata) {
j = nmlngth - 5 + ((rdta->sites + ((rdta->sites - 1)/10))/2);
if (j < nmlngth - 1) j = nmlngth - 1;
if (j > 37) j = 37;
printf("Name"); for (i=1;i<=j;i++) putchar(' '); printf("Sequences\n");
printf("----"); for (i=1;i<=j;i++) putchar(' '); printf("---------\n");
putchar('\n');
for (i = 1; i <= rdta->sites; i += 60) {
l = i + 59;
if (l > rdta->sites) l = rdta->sites;
if (adef->userwgt) {
printf("Weights ");
for (j = 11; j <= nmlngth+3; j++) putchar(' ');
for (k = i; k <= l; k++) {
putchar(itobase36(rdta->wgt[k]));
if (((k % 10) == 0) && (k < l)) putchar(' ');
}
putchar('\n');
}
if (rdta->categs > 1) {
printf("Categories");
for (j = 11; j <= nmlngth+3; j++) putchar(' ');
for (k = i; k <= l; k++) {
putchar(itobase36(rdta->sitecat[k]));
if (((k % 10) == 0) && (k < l)) putchar(' ');
}
putchar('\n');
}
for (j = 1; j <= tr->mxtips; j++) {
nameptr = tr->nodep[j]->name;
k = nmlngth+3;
while (ch = *nameptr++) {putchar(ch); k--;}
while (--k >= 0) putchar(' ');
for (k = i; k <= l; k++) {
ch = rdta->y[j][k];
if ((j > 1) && (ch == rdta->y[1][k])) ch = '.';
putchar(ch);
if (((k % 10) == 0) && (k < l)) putchar(' ');
}
putchar('\n');
}
putchar('\n');
}
}
for (j = 1; j <= tr->mxtips; j++) /* Convert characters to meanings */
for (i = 1; i <= rdta->sites; i++) {
rdta->y[j][i] = meaning[rdta->y[j][i]];
}
return TRUE;
} /* getdata */
boolean getntrees (adef)
analdef *adef;
{ /* getntrees */
if (fscanf(INFILE, "%d", &(adef->numutrees)) != 1 || findch('\n') == EOF) {
printf("ERROR: Problem reading number of user trees\n");
return FALSE;
}
if (adef->nkeep == 0) adef->nkeep = adef->numutrees;
return TRUE;
} /* getntrees */
boolean getinput (adef, rdta, cdta, tr)
analdef *adef;
rawDNA *rdta;
crunchedDNA *cdta;
tree *tr;
{ /* getinput */
if (! getnums(rdta)) return FALSE;
if (! getoptions(adef, rdta, cdta, tr)) return FALSE;
if (! adef->empf && ! getbasefreqs(rdta)) return FALSE;
if (! getyspace(rdta)) return FALSE;
if (! setupTree(tr, rdta->sites)) return FALSE;
if (! getdata(adef, rdta, tr)) return FALSE;
if (adef->usertree && ! getntrees(adef)) return FALSE;
return TRUE;
} /* getinput */
void makeboot (adef, rdta, cdta)
analdef *adef;
rawDNA *rdta;
crunchedDNA *cdta;
{ /* makeboot */
int i, j, nonzero;
double randum();
nonzero = 0;
for (i = 1; i <= rdta->sites; i++) if (rdta->wgt[i] > 0) nonzero++;
for (j = 1; j <= nonzero; j++) cdta->aliaswgt[j] = 0;
for (j = 1; j <= nonzero; j++)
cdta->aliaswgt[(int) (nonzero*randum(& adef->boot)) + 1]++;
j = 0;
cdta->wgtsum = 0;
for (i = 1; i <= rdta->sites; i++) {
if (rdta->wgt[i] > 0)
cdta->wgtsum += (rdta->wgt2[i] = rdta->wgt[i] * cdta->aliaswgt[++j]);
else
rdta->wgt2[i] = 0;
}
} /* makeboot */
void sitesort (rdta, cdta)
rawDNA *rdta;
crunchedDNA *cdta;
/* Shell sort keeping sites, weights in same order */
{ /* sitesort */
int gap, i, j, jj, jg, k;
boolean flip, tied;
for (gap = rdta->sites / 2; gap > 0; gap /= 2) {
for (i = gap + 1; i <= rdta->sites; i++) {
j = i - gap;
do {
jj = cdta->alias[j];
jg = cdta->alias[j+gap];
flip = (rdta->sitecat[jj] > rdta->sitecat[jg]);
tied = (rdta->sitecat[jj] == rdta->sitecat[jg]);
for (k = 1; (k <= rdta->numsp) && tied; k++) {
flip = (rdta->y[k][jj] > rdta->y[k][jg]);
tied = (rdta->y[k][jj] == rdta->y[k][jg]);
}
if (flip) {
cdta->alias[j] = jg;
cdta->alias[j+gap] = jj;
j -= gap;
}
} while (flip && (j > 0));
} /* for (i ... */
} /* for (gap ... */
} /* sitesort */
void sitecombcrunch (rdta, cdta)
rawDNA *rdta;
crunchedDNA *cdta;
/* combine sites that have identical patterns (and nonzero weight) */
{ /* sitecombcrunch */
int i, sitei, j, sitej, k;
boolean tied;
i = 0;
cdta->alias[0] = cdta->alias[1];
cdta->aliaswgt[0] = 0;
for (j = 1; j <= rdta->sites; j++) {
sitei = cdta->alias[i];
sitej = cdta->alias[j];
tied = (rdta->sitecat[sitei] == rdta->sitecat[sitej]);
for (k = 1; tied && (k <= rdta->numsp); k++)
tied = (rdta->y[k][sitei] == rdta->y[k][sitej]);
if (tied) {
cdta->aliaswgt[i] += rdta->wgt2[sitej];
}
else {
if (cdta->aliaswgt[i] > 0) i++;
cdta->aliaswgt[i] = rdta->wgt2[sitej];
cdta->alias[i] = sitej;
}
}
cdta->endsite = i;
if (cdta->aliaswgt[i] > 0) cdta->endsite++;
} /* sitecombcrunch */
boolean makeweights (adef, rdta, cdta)
analdef *adef;
rawDNA *rdta;
crunchedDNA *cdta;
/* make up weights vector to avoid duplicate computations */
{ /* makeweights */
int i;
if (adef->boot) makeboot(adef, rdta, cdta);
else for (i = 1; i <= rdta->sites; i++) rdta->wgt2[i] = rdta->wgt[i];
for (i = 1; i <= rdta->sites; i++) cdta->alias[i] = i;
sitesort(rdta, cdta);
sitecombcrunch(rdta, cdta);
printf("Total weight of positions in analysis = %d\n", cdta->wgtsum);
printf("There are %d distinct data patterns (columns)\n\n", cdta->endsite);
return TRUE;
} /* makeweights */
boolean makevalues (rdta, cdta)
rawDNA *rdta;
crunchedDNA *cdta;
/* set up fractional likelihoods at tips */
{ /* makevalues */
double temp, wtemp;
int i, j;
for (i = 1; i <= rdta->numsp; i++) { /* Pack and move tip data */
for (j = 0; j < cdta->endsite; j++) {
rdta->y[i-1][j] = rdta->y[i][cdta->alias[j]];
}
}
for (j = 0; j < cdta->endsite; j++) {
cdta->patcat[j] = i = rdta->sitecat[cdta->alias[j]];
cdta->patrat[j] = temp = rdta->catrat[i];
cdta->wr[j] = wtemp = temp * cdta->aliaswgt[j];
cdta->wr2[j] = temp * wtemp;
}
return TRUE;
} /* makevalues */
boolean empiricalfreqs (rdta, cdta)
rawDNA *rdta;
crunchedDNA *cdta;
/* Get empirical base frequencies from the data */
{ /* empiricalfreqs */
double sum, suma, sumc, sumg, sumt, wj, fa, fc, fg, ft;
int i, j, k, code;
yType *yptr;
rdta->freqa = 0.25;
rdta->freqc = 0.25;
rdta->freqg = 0.25;
rdta->freqt = 0.25;
for (k = 1; k <= 8; k++) {
suma = 0.0;
sumc = 0.0;
sumg = 0.0;
sumt = 0.0;
for (i = 0; i < rdta->numsp; i++) {
yptr = rdta->y[i];
for (j = 0; j < cdta->endsite; j++) {
code = *yptr++;
fa = rdta->freqa * ( code & 1);
fc = rdta->freqc * ((code >> 1) & 1);
fg = rdta->freqg * ((code >> 2) & 1);
ft = rdta->freqt * ((code >> 3) & 1);
wj = cdta->aliaswgt[j] / (fa + fc + fg + ft);
suma += wj * fa;
sumc += wj * fc;
sumg += wj * fg;
sumt += wj * ft;
}
}
sum = suma + sumc + sumg + sumt;
rdta->freqa = suma / sum;
rdta->freqc = sumc / sum;
rdta->freqg = sumg / sum;
rdta->freqt = sumt / sum;
}
return TRUE;
} /* empiricalfreqs */
void reportfreqs (adef, rdta)
analdef *adef;
rawDNA *rdta;
{ /* reportfreqs */
double suma, sumb;
if (adef->empf) printf("Empirical ");
printf("Base Frequencies:\n\n");
printf(" A %10.5f\n", rdta->freqa);
printf(" C %10.5f\n", rdta->freqc);
printf(" G %10.5f\n", rdta->freqg);
printf(" T(U) %10.5f\n\n", rdta->freqt);
rdta->freqr = rdta->freqa + rdta->freqg;
rdta->invfreqr = 1.0/rdta->freqr;
rdta->freqar = rdta->freqa * rdta->invfreqr;
rdta->freqgr = rdta->freqg * rdta->invfreqr;
rdta->freqy = rdta->freqc + rdta->freqt;
rdta->invfreqy = 1.0/rdta->freqy;
rdta->freqcy = rdta->freqc * rdta->invfreqy;
rdta->freqty = rdta->freqt * rdta->invfreqy;
printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio);
suma = rdta->ttratio*rdta->freqr*rdta->freqy
- (rdta->freqa*rdta->freqg + rdta->freqc*rdta->freqt);
sumb = rdta->freqa*rdta->freqgr + rdta->freqc*rdta->freqty;
rdta->xi = suma/(suma+sumb);
rdta->xv = 1.0 - rdta->xi;
if (rdta->xi <= 0.0) {
printf("WARNING: This transition/transversion ratio\n");
printf(" is impossible with these base frequencies!\n");
printf("Transition/transversion parameter reset\n\n");
rdta->xi = 0.000001;
rdta->xv = 1.0 - rdta->xi;
rdta->ttratio = (sumb * rdta->xi / rdta->xv
+ rdta->freqa * rdta->freqg
+ rdta->freqc * rdta->freqt)
/ (rdta->freqr * rdta->freqy);
printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio);
}
printf("(Transition/transversion parameter = %10.6f)\n\n",
rdta->xi/rdta->xv);
rdta->fracchange = 2.0 * rdta->xi * (rdta->freqa * rdta->freqgr
+ rdta->freqc * rdta->freqty)
+ rdta->xv * (1.0 - rdta->freqa * rdta->freqa
- rdta->freqc * rdta->freqc
- rdta->freqg * rdta->freqg
- rdta->freqt * rdta->freqt);
} /* reportfreqs */
boolean linkdata2tree (rdta, cdta, tr)
rawDNA *rdta;
crunchedDNA *cdta;
tree *tr;
/* Link data array to the tree tips */
{ /* linkdata2tree */
int i;
for (i = 1; i <= tr->mxtips; i++) { /* Associate data with tips */
tr->nodep[i]->tip = &(rdta->y[i-1][0]);
}
tr->rdta = rdta;
tr->cdta = cdta;
return TRUE;
} /* linkdata2tree */
xarray *setupxarray (npat)
int npat;
{ /* setupxarray */
xarray *x;
xtype *data;
x = (xarray *) Malloc(sizeof(xarray));
if (x) {
data = (xtype *) Malloc(4 * npat * sizeof(xtype));
if (data) {
x->a = data;
x->c = data += npat;
x->g = data += npat;
x->t = data + npat;
x->prev = x->next = x;
x->owner = (node *) NULL;
}
else {
Free(x);
return (xarray *) NULL;
}
}
return x;
} /* setupxarray */
boolean linkxarray (req, min, npat, freexptr, usedxptr)
int req, min, npat;
xarray **freexptr, **usedxptr;
/* Link a set of xarrays */
{ /* linkxarray */
xarray *first, *prev, *x;
int i;
first = prev = (xarray *) NULL;
i = 0;
do {
x = setupxarray(npat);
if (x) {
if (! first) first = x;
else {
prev->next = x;
x->prev = prev;
}
prev = x;
i++;
}
else {
printf("ERROR: Failure to get requested xarray memory\n");
if (i < min) return FALSE;
}
} while ((i < req) && x);
if (first) {
first->prev = prev;
prev->next = first;
}
*freexptr = first;
*usedxptr = (xarray *) NULL;
return TRUE;
} /* linkxarray */
boolean setupnodex (tr)
tree *tr;
{ /* setupnodex */
nodeptr p;
int i;
for (i = tr->mxtips + 1; (i <= 2*(tr->mxtips) - 2); i++) {
p = tr->nodep[i];
if (! (p->x = setupxarray(tr->cdta->endsite))) {
printf("ERROR: Failure to get internal node xarray memory\n");
return FALSE;
}
}
return TRUE;
} /* setupnodex */
xarray *getxtip (p)
nodeptr p;
{ /* getxtip */
xarray *new;
boolean splice;
if (! p) return (xarray *) NULL;
splice = FALSE;
if (p->x) { /* array is there; move to tail of list */
new = p->x;
if (new == new->prev) ; /* linked to self; leave it */
else if (new == usedxtip) usedxtip = usedxtip->next; /* at head */
else if (new == usedxtip->prev) ; /* already at tail */
else { /* move to tail of list */
new->prev->next = new->next;
new->next->prev = new->prev;
splice = TRUE;
}
}
else if (freextip) { /* take from unused list */
p->x = new = freextip;
new->owner = p;
if (new->prev != new) { /* not only member of freelist */
new->prev->next = new->next;
new->next->prev = new->prev;
freextip = new->next;
}
else
freextip = (xarray *) NULL;
splice = TRUE;
}
else if (usedxtip) { /* take from head of used list */
usedxtip->owner->x = (xarray *) NULL;
p->x = new = usedxtip;
new->owner = p;
usedxtip = usedxtip->next;
}
else {
printf("ERROR: Unable to locate memory for tip %d.\n", p->number);
return (xarray *) NULL;
}
if (splice) {
if (usedxtip) { /* list is not empty */
usedxtip->prev->next = new;
new->prev = usedxtip->prev;
usedxtip->prev = new;
new->next = usedxtip;
}
else
usedxtip = new->prev = new->next = new;
}
return new;
} /* getxtip */
xarray *getxnode (p)
nodeptr p;
/* Ensure that internal node p has memory */
{ /* getxnode */
nodeptr s;
if (! (p->x)) { /* Move likelihood array on this node to sector p */
if ((s = p->next)->x || (s = s->next)->x) {
p->x = s->x;
s->x = (xarray *) NULL;
}
else {
printf("ERROR: Unable to locate memory at node %d.\n", p->number);
exit(1);
}
}
return p->x;
} /* getxnode */
boolean newview (tr, p) /* Update likelihoods at node */
tree *tr;
nodeptr p;
{ /* newview */
double z1, lz1, xvlz1, z2, lz2, xvlz2;
nodeptr q, r;
xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t,
*x3a, *x3c, *x3g, *x3t;
int i;
if (p->tip) { /* Make sure that data are at tip */
int code;
yType *yptr;
if (p->x) return TRUE; /* They are already there */
if (! getxtip(p)) return FALSE; /* They are not, so get memory */
x3a = &(p->x->a[0]); /* Move tip data to xarray */
x3c = &(p->x->c[0]);
x3g = &(p->x->g[0]);
x3t = &(p->x->t[0]);
yptr = p->tip;
for (i = 0; i < tr->cdta->endsite; i++) {
code = *yptr++;
*x3a++ = code & 1;
*x3c++ = (code >> 1) & 1;
*x3g++ = (code >> 2) & 1;
*x3t++ = (code >> 3) & 1;
}
return TRUE;
}
/* Internal node needs update */
q = p->next->back;
r = p->next->next->back;
while ((! p->x) || (! q->x) || (! r->x)) {
if (! q->x) if (! newview(tr, q)) return FALSE;
if (! r->x) if (! newview(tr, r)) return FALSE;
if (! p->x) if (! getxnode(p)) return FALSE;
}
x1a = &(q->x->a[0]);
x1c = &(q->x->c[0]);
x1g = &(q->x->g[0]);
x1t = &(q->x->t[0]);
z1 = q->z;
lz1 = (z1 > zmin) ? log(z1) : log(zmin);
xvlz1 = tr->rdta->xv * lz1;
x2a = &(r->x->a[0]);
x2c = &(r->x->c[0]);
x2g = &(r->x->g[0]);
x2t = &(r->x->t[0]);
z2 = r->z;
lz2 = (z2 > zmin) ? log(z2) : log(zmin);
xvlz2 = tr->rdta->xv * lz2;
x3a = &(p->x->a[0]);
x3c = &(p->x->c[0]);
x3g = &(p->x->g[0]);
x3t = &(p->x->t[0]);
{ double zz1table[maxcategories+1], zv1table[maxcategories+1],
zz2table[maxcategories+1], zv2table[maxcategories+1],
*zz1ptr, *zv1ptr, *zz2ptr, *zv2ptr, *rptr;
double fx1r, fx1y, fx1n, suma1, sumg1, sumc1, sumt1,
fx2r, fx2y, fx2n, ki, tempi, tempj;
int *cptr;
# ifdef Vectorize
double zz1[maxpatterns], zv1[maxpatterns],
zz2[maxpatterns], zv2[maxpatterns];
int cat;
# else
double zz1, zv1, zz2, zv2;
int cat;
# endif
rptr = &(tr->rdta->catrat[1]);
zz1ptr = &(zz1table[1]);
zv1ptr = &(zv1table[1]);
zz2ptr = &(zz2table[1]);
zv2ptr = &(zv2table[1]);
# ifdef Vectorize
# pragma IVDEP
# endif
for (i = 1; i <= tr->rdta->categs; i++) { /* exps for each category */
ki = *rptr++;
*zz1ptr++ = exp(ki * lz1);
*zv1ptr++ = exp(ki * xvlz1);
*zz2ptr++ = exp(ki * lz2);
*zv2ptr++ = exp(ki * xvlz2);
}
cptr = &(tr->cdta->patcat[0]);
# ifdef Vectorize
# pragma IVDEP
for (i = 0; i < tr->cdta->endsite; i++) {
cat = *cptr++;
zz1[i] = zz1table[cat];
zv1[i] = zv1table[cat];
zz2[i] = zz2table[cat];
zv2[i] = zv2table[cat];
}
# pragma IVDEP
for (i = 0; i < tr->cdta->endsite; i++) {
fx1r = tr->rdta->freqa * *x1a + tr->rdta->freqg * *x1g;
fx1y = tr->rdta->freqc * *x1c + tr->rdta->freqt * *x1t;
fx1n = fx1r + fx1y;
tempi = fx1r * tr->rdta->invfreqr;
tempj = zv1[i] * (tempi-fx1n) + fx1n;
suma1 = zz1[i] * (*x1a++ - tempi) + tempj;
sumg1 = zz1[i] * (*x1g++ - tempi) + tempj;
tempi = fx1y * tr->rdta->invfreqy;
tempj = zv1[i] * (tempi-fx1n) + fx1n;
sumc1 = zz1[i] * (*x1c++ - tempi) + tempj;
sumt1 = zz1[i] * (*x1t++ - tempi) + tempj;
fx2r = tr->rdta->freqa * *x2a + tr->rdta->freqg * *x2g;
fx2y = tr->rdta->freqc * *x2c + tr->rdta->freqt * *x2t;
fx2n = fx2r + fx2y;
tempi = fx2r * tr->rdta->invfreqr;
tempj = zv2[i] * (tempi-fx2n) + fx2n;
*x3a++ = suma1 * (zz2[i] * (*x2a++ - tempi) + tempj);
*x3g++ = sumg1 * (zz2[i] * (*x2g++ - tempi) + tempj);
tempi = fx2y * tr->rdta->invfreqy;
tempj = zv2[i] * (tempi-fx2n) + fx2n;
*x3c++ = sumc1 * (zz2[i] * (*x2c++ - tempi) + tempj);
*x3t++ = sumt1 * (zz2[i] * (*x2t++ - tempi) + tempj);
}
# else /* Not Vectorize */
for (i = 0; i < tr->cdta->endsite; i++) {
cat = *cptr++;
zz1 = zz1table[cat];
zv1 = zv1table[cat];
fx1r = tr->rdta->freqa * *x1a + tr->rdta->freqg * *x1g;
fx1y = tr->rdta->freqc * *x1c + tr->rdta->freqt * *x1t;
fx1n = fx1r + fx1y;
tempi = fx1r * tr->rdta->invfreqr;
tempj = zv1 * (tempi-fx1n) + fx1n;
suma1 = zz1 * (*x1a++ - tempi) + tempj;
sumg1 = zz1 * (*x1g++ - tempi) + tempj;
tempi = fx1y * tr->rdta->invfreqy;
tempj = zv1 * (tempi-fx1n) + fx1n;
sumc1 = zz1 * (*x1c++ - tempi) + tempj;
sumt1 = zz1 * (*x1t++ - tempi) + tempj;
zz2 = zz2table[cat];
zv2 = zv2table[cat];
fx2r = tr->rdta->freqa * *x2a + tr->rdta->freqg * *x2g;
fx2y = tr->rdta->freqc * *x2c + tr->rdta->freqt * *x2t;
fx2n = fx2r + fx2y;
tempi = fx2r * tr->rdta->invfreqr;
tempj = zv2 * (tempi-fx2n) + fx2n;
*x3a++ = suma1 * (zz2 * (*x2a++ - tempi) + tempj);
*x3g++ = sumg1 * (zz2 * (*x2g++ - tempi) + tempj);
tempi = fx2y * tr->rdta->invfreqy;
tempj = zv2 * (tempi-fx2n) + fx2n;
*x3c++ = sumc1 * (zz2 * (*x2c++ - tempi) + tempj);
*x3t++ = sumt1 * (zz2 * (*x2t++ - tempi) + tempj);
}
# endif /* Vectorize or not */
return TRUE;
}
} /* newview */
double evaluate (tr, p)
tree *tr;
nodeptr p;
{ /* evaluate */
double sum, z, lz, xvlz,
ki, fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y,
suma, sumb, sumc, term;
# ifdef Vectorize
double zz[maxpatterns], zv[maxpatterns];
# else
double zz,zv;
# endif
double zztable[maxcategories+1], zvtable[maxcategories+1],
*zzptr, *zvptr;
double *log_f, *rptr;
xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t;
nodeptr q;
int cat, *cptr, i, *wptr;
q = p->back;
while ((! p->x) || (! q->x)) {
if (! (p->x)) if (! newview(tr, p)) return badEval;
if (! (q->x)) if (! newview(tr, q)) return badEval;
}
x1a = &(p->x->a[0]);
x1c = &(p->x->c[0]);
x1g = &(p->x->g[0]);
x1t = &(p->x->t[0]);
x2a = &(q->x->a[0]);
x2c = &(q->x->c[0]);
x2g = &(q->x->g[0]);
x2t = &(q->x->t[0]);
z = p->z;
if (z < zmin) z = zmin;
lz = log(z);
xvlz = tr->rdta->xv * lz;
rptr = &(tr->rdta->catrat[1]);
zzptr = &(zztable[1]);
zvptr = &(zvtable[1]);
# ifdef Vectorize
# pragma IVDEP
# endif
for (i = 1; i <= tr->rdta->categs; i++) {
ki = *rptr++;
*zzptr++ = exp(ki * lz);
*zvptr++ = exp(ki * xvlz);
}
wptr = &(tr->cdta->aliaswgt[0]);
cptr = &(tr->cdta->patcat[0]);
log_f = tr->log_f;
tr->log_f_valid = tr->cdta->endsite;
sum = 0.0;
# ifdef Vectorize
# pragma IVDEP
for (i = 0; i < tr->cdta->endsite; i++) {
cat = *cptr++;
zz[i] = zztable[cat];
zv[i] = zvtable[cat];
}
# pragma IVDEP
for (i = 0; i < tr->cdta->endsite; i++) {
fx1a = tr->rdta->freqa * *x1a++;
fx1g = tr->rdta->freqg * *x1g++;
fx1c = tr->rdta->freqc * *x1c++;
fx1t = tr->rdta->freqt * *x1t++;
suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t;
fx2r = tr->rdta->freqa * *x2a++ + tr->rdta->freqg * *x2g++;
fx2y = tr->rdta->freqc * *x2c++ + tr->rdta->freqt * *x2t++;
fx1r = fx1a + fx1g;
fx1y = fx1c + fx1t;
sumc = (fx1r + fx1y) * (fx2r + fx2y);
sumb = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy;
suma -= sumb;
sumb -= sumc;
term = log(zz[i] * suma + zv[i] * sumb + sumc);
sum += *wptr++ * term;
*log_f++ = term;
}
# else /* Not Vectorize */
for (i = 0; i < tr->cdta->endsite; i++) {
cat = *cptr++;
zz = zztable[cat];
zv = zvtable[cat];
fx1a = tr->rdta->freqa * *x1a++;
fx1g = tr->rdta->freqg * *x1g++;
fx1c = tr->rdta->freqc * *x1c++;
fx1t = tr->rdta->freqt * *x1t++;
suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t;
fx2r = tr->rdta->freqa * *x2a++ + tr->rdta->freqg * *x2g++;
fx2y = tr->rdta->freqc * *x2c++ + tr->rdta->freqt * *x2t++;
fx1r = fx1a + fx1g;
fx1y = fx1c + fx1t;
sumc = (fx1r + fx1y) * (fx2r + fx2y);
sumb = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy;
suma -= sumb;
sumb -= sumc;
term = log(zz * suma + zv * sumb + sumc);
sum += *wptr++ * term;
*log_f++ = term;
}
# endif /* Vectorize or not */
tr->likelihood = sum;
return sum;
} /* evaluate */
double makenewz (tr, p, q, z0, maxiter)
tree *tr;
nodeptr p, q;
double z0;
int maxiter;
{ /* makenewz */
double *abi, *bci, *sumci, *abptr, *bcptr, *sumcptr;
double dlnLidlz, dlnLdlz, d2lnLdlz2, z, zprev, zstep, lz, xvlz,
ki, suma, sumb, sumc, ab, bc, inv_Li, t1, t2,
fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y;
double zztable[maxcategories+1], zvtable[maxcategories+1];
double *zzptr, *zvptr;
double *rptr, *wrptr, *wr2ptr;
xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t;
int cat, *cptr, i, curvatOK;
while ((! p->x) || (! q->x)) {
if (! (p->x)) if (! newview(tr, p)) return badZ;
if (! (q->x)) if (! newview(tr, q)) return badZ;
}
x1a = &(p->x->a[0]);
x1c = &(p->x->c[0]);
x1g = &(p->x->g[0]);
x1t = &(p->x->t[0]);
x2a = &(q->x->a[0]);
x2c = &(q->x->c[0]);
x2g = &(q->x->g[0]);
x2t = &(q->x->t[0]);
{ unsigned scratch_size;
scratch_size = sizeof(double) * tr->cdta->endsite;
if ((abi = (double *) Malloc(scratch_size)) &&
(bci = (double *) Malloc(scratch_size)) &&
(sumci = (double *) Malloc(scratch_size))) ;
else {
printf("ERROR: makenewz unable to obtain space for arrays\n");
return badZ;
}
}
abptr = abi;
bcptr = bci;
sumcptr = sumci;
# ifdef Vectorize
# pragma IVDEP
# endif
for (i = 0; i < tr->cdta->endsite; i++) {
fx1a = tr->rdta->freqa * *x1a++;
fx1g = tr->rdta->freqg * *x1g++;
fx1c = tr->rdta->freqc * *x1c++;
fx1t = tr->rdta->freqt * *x1t++;
suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t;
fx2r = tr->rdta->freqa * *x2a++ + tr->rdta->freqg * *x2g++;
fx2y = tr->rdta->freqc * *x2c++ + tr->rdta->freqt * *x2t++;
fx1r = fx1a + fx1g;
fx1y = fx1c + fx1t;
*sumcptr++ = sumc = (fx1r + fx1y) * (fx2r + fx2y);
sumb = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy;
*abptr++ = suma - sumb;
*bcptr++ = sumb - sumc;
}
z = z0;
do {
zprev = z;
zstep = (1.0 - zmax) * z + zmin;
curvatOK = FALSE;
do {
if (z < zmin) z = zmin;
else if (z > zmax) z = zmax;
lz = log(z);
xvlz = tr->rdta->xv * lz;
rptr = &(tr->rdta->catrat[1]);
zzptr = &(zztable[1]);
zvptr = &(zvtable[1]);
# ifdef Vectorize
# pragma IVDEP
# endif
for (i = 1; i <= tr->rdta->categs; i++) {
ki = *rptr++;
*zzptr++ = exp(ki * lz);
*zvptr++ = exp(ki * xvlz);
}
abptr = abi;
bcptr = bci;
sumcptr = sumci;
cptr = &(tr->cdta->patcat[0]);
wrptr = &(tr->cdta->wr[0]);
wr2ptr = &(tr->cdta->wr2[0]);
dlnLdlz = 0.0; /* = d(ln(likelihood))/d(lz) */
d2lnLdlz2 = 0.0; /* = d2(ln(likelihood))/d(lz)2 */
# ifdef Vectorize
# pragma IVDEP
# endif
for (i = 0; i < tr->cdta->endsite; i++) {
cat = *cptr++; /* ratecategory(i) */
ab = *abptr++ * zztable[cat];
bc = *bcptr++ * zvtable[cat];
sumc = *sumcptr++;
inv_Li = 1.0/(ab + bc + sumc);
t1 = ab * inv_Li;
t2 = tr->rdta->xv * bc * inv_Li;
dlnLidlz = t1 + t2;
dlnLdlz += *wrptr++ * dlnLidlz;
d2lnLdlz2 += *wr2ptr++ * (t1 + tr->rdta->xv * t2 - dlnLidlz * dlnLidlz);
}
if ((d2lnLdlz2 >= 0.0) && (z < zmax))
zprev = z = 0.37 * z + 0.63; /* Bad curvature, shorten branch */
else
curvatOK = TRUE;
} while (! curvatOK);
if (d2lnLdlz2 < 0.0) {
z *= exp(-dlnLdlz / d2lnLdlz2);
if (z < zmin) z = zmin;
if (z > 0.25 * zprev + 0.75) /* Limit steps toward z = 1.0 */
z = 0.25 * zprev + 0.75;
}
if (z > zmax) z = zmax;
} while ((--maxiter > 0) && (ABS(z - zprev) > zstep));
Free(abi);
Free(bci);
Free(sumci);
return z;
} /* makenewz */
boolean update (tr, p)
tree *tr;
nodeptr p;
{ /* update */
nodeptr q;
double z0, z;
q = p->back;
z0 = q->z;
if ((z = makenewz(tr, p, q, z0, newzpercycle)) == badZ) return FALSE;
p->z = q->z = z;
if (ABS(z - z0) > deltaz) tr->smoothed = FALSE;
return TRUE;
} /* update */
boolean smooth (tr, p)
tree *tr;
nodeptr p;
{ /* smooth */
nodeptr q;
if (! update(tr, p)) return FALSE; /* Adjust branch */
if (! p->tip) { /* Adjust descendants */
q = p->next;
while (q != p) {
if (! smooth(tr, q->back)) return FALSE;
q = q->next;
}
# if ReturnSmoothedView
if (! newview(tr, p)) return FALSE;
# endif
}
return TRUE;
} /* smooth */
boolean smoothTree (tr, maxtimes)
tree *tr;
int maxtimes;
{ /* smoothTree */
nodeptr p, q;
p = tr->start;
while (--maxtimes >= 0) {
tr->smoothed = TRUE;
if (! smooth(tr, p->back)) return FALSE;
if (! p->tip) {
q = p->next;
while (q != p) {
if (! smooth(tr, q->back)) return FALSE;
q = q->next;
}
}
if (tr->smoothed) break;
}
return TRUE;
} /* smoothTree */
boolean localSmooth (tr, p, maxtimes) /* Smooth branches around p */
tree *tr;
nodeptr p;
int maxtimes;
{ /* localSmooth */
nodeptr q;
if (p->tip) return FALSE; /* Should be an error */
while (--maxtimes >= 0) {
tr->smoothed = TRUE;
q = p;
do {
if (! update(tr, q)) return FALSE;
q = q->next;
} while (q != p);
if (tr->smoothed) break;
}
tr->smoothed = FALSE; /* Only smooth locally */
return TRUE;
} /* localSmooth */
void hookup (p, q, z)
nodeptr p, q;
double z;
{ /* hookup */
p->back = q;
q->back = p;
p->z = q->z = z;
} /* hookup */
boolean insert (tr, p, q, glob) /* Insert node p into branch q <-> q->back */
tree *tr;
nodeptr p, q;
boolean glob; /* Smooth tree globally? */
/* q
/.
add/ .
/ .
pn .
s ---- p .remove
pnn .
\ .
add\ .
\. pn = p->next;
r pnn = p->next->next;
*/
{ /* insert */
nodeptr r, s;
r = q->back;
s = p->back;
# if BestInsertAverage && ! Master
{ double zqr, zqs, zrs, lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax;
if ((zqr = makenewz(tr, q, r, q->z, iterations)) == badZ) return FALSE;
if ((zqs = makenewz(tr, q, s, defaultz, iterations)) == badZ) return FALSE;
if ((zrs = makenewz(tr, r, s, defaultz, iterations)) == badZ) return FALSE;
lzqr = (zqr > zmin) ? log(zqr) : log(zmin); /* long branches */
lzqs = (zqs > zmin) ? log(zqs) : log(zmin);
lzrs = (zrs > zmin) ? log(zrs) : log(zmin);
lzsum = 0.5 * (lzqr + lzqs + lzrs);
lzq = lzsum - lzrs;
lzr = lzsum - lzqs;
lzs = lzsum - lzqr;
lzmax = log(zmax);
if (lzq > lzmax) {lzq = lzmax; lzr = lzqr; lzs = lzqs;} /* short */
else if (lzr > lzmax) {lzr = lzmax; lzq = lzqr; lzs = lzrs;}
else if (lzs > lzmax) {lzs = lzmax; lzq = lzqs; lzr = lzrs;}
hookup(p->next, q, exp(lzq));
hookup(p->next->next, r, exp(lzr));
hookup(p, s, exp(lzs));
}
# else
{ double z;
z = sqrt(q->z);
hookup(p->next, q, z);
hookup(p->next->next, r, z);
}
# endif
if (! newview(tr, p)) return FALSE; /* So that p is valid at update */
tr->opt_level = 0;
# if ! Master /* Smoothings are done by slave */
if (glob) { /* Smooth whole tree */
if (! smoothTree(tr, smoothings)) return FALSE;
}
else { /* Smooth locale of p */
if (! localSmooth(tr, p, smoothings)) return FALSE;
}
# else
tr->likelihood = unlikely;
# endif
return TRUE;
} /* insert */
nodeptr removeNode (tr, p)
tree *tr;
nodeptr p;
/* q
.|
remove. |
. |
pn |
s ---- p |add
pnn |
. |
remove. |
.| pn = p->next;
r pnn = p->next->next;
*/
/* remove p and return where it was */
{ /* removeNode */
double zqr;
nodeptr q, r;
q = p->next->back;
r = p->next->next->back;
zqr = q->z * r->z;
# if ! Master
if ((zqr = makenewz(tr, q, r, zqr, iterations)) == badZ) return (node *) NULL;
# endif
hookup(q, r, zqr);
p->next->next->back = p->next->back = (node *) NULL;
return q;
} /* removeNode */
boolean initrav (tr, p)
tree *tr;
nodeptr p;
{ /* initrav */
nodeptr q;
if (! p->tip) {
q = p->next;
do {
if (! initrav(tr, q->back)) return FALSE;
q = q->next;
} while (q != p);
if (! newview(tr, p)) return FALSE;
}
return TRUE;
} /* initrav */
nodeptr buildNewTip (tr, p)
tree *tr;
nodeptr p;
{ /* buildNewTip */
nodeptr q;
q = tr->nodep[(tr->nextnode)++];
hookup(p, q, defaultz);
return q;
} /* buildNewTip */
boolean buildSimpleTree (tr, ip, iq, ir)
tree *tr;
int ip, iq, ir;
{ /* buildSimpleTree */
/* p, q and r are tips meeting at s */
nodeptr p, s;
int i;
i = MIN(ip, iq);
if (ir < i) i = ir;
tr->start = tr->nodep[i];
tr->ntips = 3;
p = tr->nodep[ip];
hookup(p, tr->nodep[iq], defaultz);
s = buildNewTip(tr, tr->nodep[ir]);
return insert(tr, s, p, FALSE); /* Smoothing is local to s */
} /* buildSimpleTree */
#ifndef HasStrLib
char * strchr (str, chr)
char *str;
int chr;
{ /* strchr */
int c;
while (c = *str) {if (c == chr) return str; str++;}
return (char *) NULL;
} /* strchr */
char * strstr (str1, str2)
char *str1, *str2;
{ /* strstr */
char *s1, *s2;
int c;
while (*(s1 = str1)) {
s2 = str2;
do {
if (! (c = *s2++)) return str1;
}
while (*s1++ == c);
str1++;
}
return (char *) NULL;
} /* strstr */
#endif
boolean readKeyValue (string, key, format, value)
char *string, *key, *format;
void *value;
{ /* readKeyValue */
if (! (string = (char*)strstr(string, key))) return FALSE;
string += strlen(key);
if (! (string = (char*)strchr(string, '='))) return FALSE;
string++;
return sscanf(string, format, value); /* 1 if read, otherwise 0 */
} /* readKeyValue */
#if Master || Slave
double str_readTreeLikelihood (treestr)
char *treestr;
{ /* str_readTreeLikelihood */
double lk1;
char *com, *com_end;
boolean readKeyValue();
if ((com = strchr(treestr, '[')) && (com < strchr(treestr, '('))
&& (com_end = strchr(com, ']'))) {
com++;
*com_end = 0;
if (readKeyValue(com, likelihood_key, "%lg", (void *) &(lk1))) {
*com_end = ']';
return lk1;
}
}
fprintf(stderr, "ERROR reading likelihood in receiveTree\n");
return badEval;
} /* str_readTreeLikelihood */
boolean sendTree (comm, tr)
comm_block *comm;
tree *tr;
{ /* sendTree */
char *treestr;
char *treeString();
# if Master
void sendTreeNum();
# endif
comm->done_flag = tr->likelihood > 0.0;
if (comm->done_flag)
write_comm_msg(comm, NULL);
else {
treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
if (! treestr) {
fprintf(stderr, "sendTree: Malloc failure\n");
return 0;
}
# if Master
if (send_ahead >= MAX_SEND_AHEAD) {
double new_likelihood;
int n_to_get;
n_to_get = (send_ahead+1)/2;
sendTreeNum(n_to_get);
send_ahead -= n_to_get;
read_comm_msg(& comm_slave, treestr);
new_likelihood = str_readTreeLikelihood(treestr);
if (new_likelihood == badEval) return FALSE;
if (! best_tr_recv || (new_likelihood > best_lk_recv)) {
if (best_tr_recv) Free(best_tr_recv);
best_tr_recv = Malloc(strlen(treestr) + 1);
strcpy(best_tr_recv, treestr);
best_lk_recv = new_likelihood;
}
}
send_ahead++;
# endif /* End #if Master */
REPORT_SEND_TREE;
(void) treeString(treestr, tr, tr->start->back, 1);
write_comm_msg(comm, treestr);
Free(treestr);
}
return TRUE;
} /* sendTree */
boolean receiveTree (comm, tr)
comm_block *comm;
tree *tr;
{ /* receiveTree */
char *treestr;
boolean status;
boolean str_treeReadLen();
treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
if (! treestr) {
fprintf(stderr, "receiveTree: Malloc failure\n");
return 0;
}
read_comm_msg(comm, treestr);
if (comm->done_flag) {
tr->likelihood = 1.0;
status = TRUE;
}
else {
# if Master
if (best_tr_recv) {
if (str_readTreeLikelihood(treestr) < best_lk_recv) {
strcpy(treestr, best_tr_recv); /* Overwrite new tree with best */
}
Free(best_tr_recv);
best_tr_recv = NULL;
}
# endif /* End #if Master */
status = str_treeReadLen(treestr, tr);
}
Free(treestr);
return status;
} /* receiveTree */
void requestForWork ()
{ /* requestForWork */
p4_send(DNAML_REQUEST, DNAML_DISPATCHER_ID, NULL, 0);
} /* requestForWork */
#endif /* End #if Master || Slave */
#if Master
void sendTreeNum(n_to_get)
int n_to_get;
{ /* sendTreeNum */
char scr[512];
sprintf(scr, "%d", n_to_get);
p4_send(DNAML_NUM_TREE, DNAML_MERGER_ID, scr, strlen(scr)+1);
} /* sendTreeNum */
boolean getReturnedTrees (tr, bt, n_tree_sent)
tree *tr;
bestlist *bt;
int n_tree_sent; /* number of trees sent to slaves */
{ /* getReturnedTrees */
void sendTreeNum();
boolean receiveTree();
sendTreeNum(send_ahead);
send_ahead = 0;
if (! receiveTree(& comm_slave, tr)) return FALSE;
tr->smoothed = TRUE;
(void) saveBestTree(bt, tr);
return TRUE;
} /* getReturnedTrees */
#endif
void cacheZ (tr)
tree *tr;
{ /* cacheZ */
nodeptr p;
int nodes;
nodes = tr->mxtips + 3 * (tr->mxtips - 2);
p = tr->nodep[1];
while (nodes-- > 0) {p->z0 = p->z; p++;}
} /* cacheZ */
void restoreZ (tr)
tree *tr;
{ /* restoreZ */
nodeptr p;
int nodes;
nodes = tr->mxtips + 3 * (tr->mxtips - 2);
p = tr->nodep[1];
while (nodes-- > 0) {p->z = p->z0; p++;}
} /* restoreZ */
boolean testInsert (tr, p, q, bt, fast)
tree *tr;
nodeptr p, q;
bestlist *bt;
boolean fast;
{ /* testInsert */
double qz;
nodeptr r;
r = q->back; /* Save original connection */
qz = q->z;
if (! insert(tr, p, q, ! fast)) return FALSE;
# if ! Master
if (evaluate(tr, fast ? p->next->next : tr->start) == badEval) return FALSE;
(void) saveBestTree(bt, tr);
# else /* Master */
tr->likelihood = unlikely;
if (! sendTree(& comm_slave, tr)) return FALSE;
# endif
/* remove p from this branch */
hookup(q, r, qz);
p->next->next->back = p->next->back = (nodeptr) NULL;
if (! fast) { /* With fast add, other values are still OK */
restoreZ(tr); /* Restore branch lengths */
# if ! Master /* Regenerate x values */
if (! initrav(tr, p->back)) return FALSE;
if (! initrav(tr, q)) return FALSE;
if (! initrav(tr, r)) return FALSE;
# endif
}
return TRUE;
} /* testInsert */
int addTraverse (tr, p, q, mintrav, maxtrav, bt, fast)
tree *tr;
nodeptr p, q;
int mintrav, maxtrav;
bestlist *bt;
boolean fast;
{ /* addTraverse */
int tested, newtested;
tested = 0;
if (--mintrav <= 0) { /* Moved minimum distance? */
if (! testInsert(tr, p, q, bt, fast)) return badRear;
tested++;
}
if ((! q->tip) && (--maxtrav > 0)) { /* Continue traverse? */
newtested = addTraverse(tr, p, q->next->back,
mintrav, maxtrav, bt, fast);
if (newtested == badRear) return badRear;
tested += newtested;
newtested = addTraverse(tr, p, q->next->next->back,
mintrav, maxtrav, bt, fast);
if (newtested == badRear) return badRear;
tested += newtested;
}
return tested;
} /* addTraverse */
int rearrange (tr, p, mintrav, maxtrav, bt)
tree *tr;
nodeptr p;
int mintrav, maxtrav;
bestlist *bt;
/* rearranges the tree, globally or locally */
{ /* rearrange */
double p1z, p2z, q1z, q2z;
nodeptr p1, p2, q, q1, q2;
int tested, mintrav2, newtested;
tested = 0;
if (maxtrav < 1 || mintrav > maxtrav) return tested;
/* Moving subtree forward in tree. */
if (! p->tip) {
p1 = p->next->back;
p2 = p->next->next->back;
if (! p1->tip || ! p2->tip) {
p1z = p1->z;
p2z = p2->z;
if (! removeNode(tr, p)) return badRear;
cacheZ(tr);
if (! p1->tip) {
newtested = addTraverse(tr, p, p1->next->back,
mintrav, maxtrav, bt, FALSE);
if (newtested == badRear) return badRear;
tested += newtested;
newtested = addTraverse(tr, p, p1->next->next->back,
mintrav, maxtrav, bt, FALSE);
if (newtested == badRear) return badRear;
tested += newtested;
}
if (! p2->tip) {
newtested = addTraverse(tr, p, p2->next->back,
mintrav, maxtrav, bt, FALSE);
if (newtested == badRear) return badRear;
tested += newtested;
newtested = addTraverse(tr, p, p2->next->next->back,
mintrav, maxtrav, bt, FALSE);
if (newtested == badRear) return badRear;
tested += newtested;
}
hookup(p->next, p1, p1z); /* Restore original tree */
hookup(p->next->next, p2, p2z);
if (! (initrav(tr, tr->start)
&& initrav(tr, tr->start->back))) return badRear;
}
} /* if (! p->tip) */
/* Moving subtree backward in tree. Minimum move is 2 to avoid duplicates */
q = p->back;
if (! q->tip && maxtrav > 1) {
q1 = q->next->back;
q2 = q->next->next->back;
if (! q1->tip && (!q1->next->back->tip || !q1->next->next->back->tip) ||
! q2->tip && (!q2->next->back->tip || !q2->next->next->back->tip)) {
q1z = q1->z;
q2z = q2->z;
if (! removeNode(tr, q)) return badRear;
cacheZ(tr);
mintrav2 = mintrav > 2 ? mintrav : 2;
if (! q1->tip) {
newtested = addTraverse(tr, q, q1->next->back,
mintrav2 , maxtrav, bt, FALSE);
if (newtested == badRear) return badRear;
tested += newtested;
newtested = addTraverse(tr, q, q1->next->next->back,
mintrav2 , maxtrav, bt, FALSE);
if (newtested == badRear) return badRear;
tested += newtested;
}
if (! q2->tip) {
newtested = addTraverse(tr, q, q2->next->back,
mintrav2 , maxtrav, bt, FALSE);
if (newtested == badRear) return badRear;
tested += newtested;
newtested = addTraverse(tr, q, q2->next->next->back,
mintrav2 , maxtrav, bt, FALSE);
if (newtested == badRear) return badRear;
tested += newtested;
}
hookup(q->next, q1, q1z); /* Restore original tree */
hookup(q->next->next, q2, q2z);
if (! (initrav(tr, tr->start)
&& initrav(tr, tr->start->back))) return badRear;
}
} /* if (! q->tip && maxtrav > 1) */
/* Move other subtrees */
if (! p->tip) {
newtested = rearrange(tr, p->next->back, mintrav, maxtrav, bt);
if (newtested == badRear) return badRear;
tested += newtested;
newtested = rearrange(tr, p->next->next->back, mintrav, maxtrav, bt);
if (newtested == badRear) return badRear;
tested += newtested;
}
return tested;
} /* rearrange */
#ifdef NoGetPID
/* macintosh */
/* this is NOT define we want, but what Mac define fits all compilers !? */
long getpid(void)
{
return 0;
}
#endif
FILE *fopen_pid (filenm, mode, name_pid)
char *filenm, *mode, *name_pid;
{ /* fopen_pid */
(void) sprintf(name_pid, "%s.%d", filenm, getpid());
return fopen(name_pid, mode);
} /* fopen_pid */
#if DeleteCheckpointFile
void unlink_pid (filenm)
char *filenm;
{ /* unlink_pid */
char scr[512];
(void) sprintf(scr, "%s.%d", filenm, getpid());
unlink(scr);
} /* unlink_pid */
#endif
void writeCheckpoint (tr)
tree *tr;
{ /* writeCheckpoint */
char filename[128];
FILE *checkpointf;
void treeOut();
checkpointf = fopen_pid(checkpointname, "a", filename);
if (checkpointf) {
treeOut(checkpointf, tr, treeNewick);
(void) fclose(checkpointf);
}
} /* writeCheckpoint */
node * findAnyTip(p)
nodeptr p;
{ /* findAnyTip */
return p->tip ? p : findAnyTip(p->next->back);
} /* findAnyTip */
boolean optimize (tr, maxtrav, bt)
tree *tr;
int maxtrav;
bestlist *bt;
{ /* optimize */
nodeptr p;
int mintrav, tested;
if (tr->ntips < 4) return TRUE;
writeCheckpoint(tr); /* checkpoint the starting tree */
if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3;
if (maxtrav <= tr->opt_level) return TRUE;
printf(" Doing %s rearrangements\n",
(maxtrav == 1) ? "local" :
(maxtrav < tr->ntips - 3) ? "regional" : "global");
/* loop while tree gets better */
do {
(void) startOpt(bt, tr);
mintrav = tr->opt_level + 1;
/* rearrange must start from a tip or it will miss some trees */
p = findAnyTip(tr->start);
tested = rearrange(tr, p->back, mintrav, maxtrav, bt);
if (tested == badRear) return FALSE;
# if Master
if (! getReturnedTrees(tr, bt, tested)) return FALSE;
# endif
bt->numtrees += tested;
(void) setOptLevel(bt, maxtrav);
if (! recallBestTree(bt, 1, tr)) return FALSE; /* recover best tree */
printf(" Tested %d alternative trees\n", tested);
if (bt->improved) {
printf(" Ln Likelihood =%14.5f\n", tr->likelihood);
}
writeCheckpoint(tr); /* checkpoint the new tree */
} while (maxtrav > tr->opt_level);
return TRUE;
} /* optimize */
void coordinates (tr, p, lengthsum, tdptr)
tree *tr;
nodeptr p;
double lengthsum;
drawdata *tdptr;
{ /* coordinates */
/* establishes coordinates of nodes */
double x, z;
nodeptr q, first, last;
if (p->tip) {
p->xcoord = NINT(over * lengthsum);
p->ymax = p->ymin = p->ycoord = tdptr->tipy;
tdptr->tipy += down;
if (lengthsum > tdptr->tipmax) tdptr->tipmax = lengthsum;
}
else {
q = p->next;
do {
z = q->z;
if (z < zmin) z = zmin;
x = lengthsum - tr->rdta->fracchange * log(z);
coordinates(tr, q->back, x, tdptr);
q = q->next;
} while (p == tr->start->back ? q != p->next : q != p);
first = p->next->back;
q = p;
while (q->next != p) q = q->next;
last = q->back;
p->xcoord = NINT(over * lengthsum);
p->ycoord = (first->ycoord + last->ycoord)/2;
p->ymin = first->ymin;
p->ymax = last->ymax;
}
} /* coordinates */
void drawline (tr, i, scale)
tree *tr;
int i;
double scale;
/* draws one row of the tree diagram by moving up tree */
/* Modified to handle 1000 taxa, October 16, 1991 */
{ /* drawline */
nodeptr p, q, r, first, last;
int n, j, k, l, extra;
boolean done;
p = q = tr->start->back;
extra = 0;
if (i == p->ycoord) {
k = q->number - tr->mxtips;
for (j = k; j < 1000; j *= 10) putchar('-');
printf("%d", k);
extra = 1;
}
else printf(" ");
do {
if (! p->tip) {
r = p->next;
done = FALSE;
do {
if ((i >= r->back->ymin) && (i <= r->back->ymax)) {
q = r->back;
done = TRUE;
}
r = r->next;
} while (! done && (p == tr->start->back ? r != p->next : r != p));
first = p->next->back;
r = p;
while (r->next != p) r = r->next;
last = r->back;
if (p == tr->start->back) last = p->back;
}
done = (p->tip) || (p == q);
n = NINT(scale*(q->xcoord - p->xcoord));
if ((n < 3) && (! q->tip)) n = 3;
n -= extra;
extra = 0;
if ((q->ycoord == i) && (! done)) {
if (p->ycoord != q->ycoord) putchar('+');
else putchar('-');
if (! q->tip) {
k = q->number - tr->mxtips;
l = n - 3;
for (j = k; j < 100; j *= 10) l++;
for (j = 1; j <= l; j++) putchar('-');
printf("%d", k);
extra = 1;
}
else for (j = 1; j <= n-1; j++) putchar('-');
}
else if (! p->tip) {
if ((last->ycoord > i) && (first->ycoord < i) && (i != p->ycoord)) {
putchar('!');
for (j = 1; j <= n-1; j++) putchar(' ');
}
else for (j = 1; j <= n; j++) putchar(' ');
}
else
for (j = 1; j <= n; j++) putchar(' ');
p = q;
} while (! done);
if ((p->ycoord == i) && p->tip) {
printf(" %s", p->name);
}
putchar('\n');
} /* drawline */
void printTree (tr, adef)
tree *tr;
analdef *adef;
/* prints out diagram of the tree */
{ /* printTree */
drawdata tipdata;
double scale;
int i, imax;
if (adef->trprint) {
putchar('\n');
tipdata.tipy = 1;
tipdata.tipmax = 0.0;
coordinates(tr, tr->start->back, (double) 0.0, & tipdata);
scale = 1.0 / tipdata.tipmax;
imax = tipdata.tipy - down;
for (i = 1; i <= imax; i++) drawline(tr, i, scale);
printf("\nRemember: ");
if (adef->root) printf("(although rooted by outgroup) ");
printf("this is an unrooted tree!\n\n");
}
} /* printTree */
double sigma (tr, p, sumlrptr)
tree *tr;
nodeptr p;
double *sumlrptr;
/* compute standard deviation */
{ /* sigma */
double slope, sum, sumlr, z, zv, zz, lz,
rat, suma, sumb, sumc, d2, d, li, temp, abzz, bczv, t3,
fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y, w;
double *rptr;
xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t;
nodeptr q;
int i, *wptr;
q = p->back;
while ((! p->x) || (! q->x)) {
if (! (p->x)) if (! newview(tr, p)) return -1.0;
if (! (q->x)) if (! newview(tr, q)) return -1.0;
}
x1a = &(p->x->a[0]);
x1c = &(p->x->c[0]);
x1g = &(p->x->g[0]);
x1t = &(p->x->t[0]);
x2a = &(q->x->a[0]);
x2c = &(q->x->c[0]);
x2g = &(q->x->g[0]);
x2t = &(q->x->t[0]);
z = p->z;
if (z < zmin) z = zmin;
lz = log(z);
wptr = &(tr->cdta->aliaswgt[0]);
rptr = &(tr->cdta->patrat[0]);
sum = sumlr = slope = 0.0;
# ifdef Vectorize
# pragma IVDEP
# endif
for (i = 0; i < tr->cdta->endsite; i++) {
rat = *rptr++;
zz = exp(rat * lz);
zv = exp(rat * tr->rdta->xv * lz);
fx1a = tr->rdta->freqa * *x1a++;
fx1g = tr->rdta->freqg * *x1g++;
fx1c = tr->rdta->freqc * *x1c++;
fx1t = tr->rdta->freqt * *x1t++;
fx1r = fx1a + fx1g;
fx1y = fx1c + fx1t;
suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t;
fx2r = tr->rdta->freqa * *x2a++ + tr->rdta->freqg * *x2g++;
fx2y = tr->rdta->freqc * *x2c++ + tr->rdta->freqt * *x2t++;
sumc = (fx1r + fx1y) * (fx2r + fx2y);
sumb = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy;
abzz = zz * (suma - sumb);
bczv = zv * (sumb - sumc);
li = sumc + abzz + bczv;
t3 = tr->rdta->xv * bczv;
d = abzz + t3;
d2 = rat * (abzz*(rat-1.0) + t3*(rat * tr->rdta->xv - 1.0));
temp = rat * d / li;
w = *wptr++;
slope += w * temp;
sum += w * (temp * temp - d2/li);
sumlr += w * log(li / (suma + 1.0E-300));
}
*sumlrptr = sumlr;
return (sum > 1.0E-300) ? z*(-slope + sqrt(slope*slope + 3.841*sum))/sum
: 1.0;
} /* sigma */
void describe (tr, p)
tree *tr;
nodeptr p;
/* print out information for one branch */
{ /* describe */
double z, s, sumlr;
nodeptr q;
char *nameptr;
int k, ch;
q = p->back;
printf("%4d ", q->number - tr->mxtips);
if (p->tip) {
nameptr = p->name;
k = nmlngth;
while (ch = *nameptr++) {putchar(ch); k--;}
while (--k >= 0) putchar(' ');
}
else {
printf("%4d", p->number - tr->mxtips);
for (k = 4; k < nmlngth; k++) putchar(' ');
}
z = q->z;
if (z <= zmin) printf(" infinity");
else printf("%15.5f", -log(z) * tr->rdta->fracchange);
s = sigma(tr, q, & sumlr);
printf(" (");
if (z + s >= zmax) printf(" zero");
else printf("%9.5f", (double) -log(z + s) * tr->rdta->fracchange);
putchar(',');
if (z - s <= zmin) printf(" infinity");
else printf("%12.5f", (double) -log(z - s) * tr->rdta->fracchange);
putchar(')');
if (sumlr > 2.995 ) printf(" **");
else if (sumlr > 1.9205) printf(" *");
putchar('\n');
if (! p->tip) {
describe(tr, p->next->back);
describe(tr, p->next->next->back);
}
} /* describe */
void summarize (tr)
tree *tr;
/* print out branch length information and node numbers */
{ /* summarize */
printf("Ln Likelihood =%14.5f\n", tr->likelihood);
putchar('\n');
printf(" Between And Length");
printf(" Approx. Confidence Limits\n");
printf(" ------- --- ------");
printf(" ------- ---------- ------\n");
describe(tr, tr->start->back->next->back);
describe(tr, tr->start->back->next->next->back);
describe(tr, tr->start);
putchar('\n');
printf(" * = significantly positive, P < 0.05\n");
printf(" ** = significantly positive, P < 0.01\n\n\n");
} /* summarize */
/*=========== This is a problem if tr->start->back is a tip! ===========*/
/* All routines should be contrived so that tr->start->back is not a tip */
char *treeString (treestr, tr, p, form)
char *treestr;
tree *tr;
nodeptr p;
int form;
/* write string with representation of tree */
/* form == 1 -> Newick tree */
/* form == 2 -> Prolog fact */
/* form == 3 -> PHYLIP tree */
{ /* treeString */
double x, z;
char *nameptr;
int c;
if (p == tr->start->back) {
if (form != treePHYLIP) {
if (form == treeProlog) {
(void) sprintf(treestr, "phylip_tree(");
while (*treestr) treestr++; /* move pointer to null */
}
(void) sprintf(treestr, "[&&%s: version = '%s'",
programName, programVersion);
while (*treestr) treestr++;
(void) sprintf(treestr, ", %s = %15.13g",
likelihood_key, tr->likelihood);
while (*treestr) treestr++;
(void) sprintf(treestr, ", %s = %d", ntaxa_key, tr->ntips);
while (*treestr) treestr++;
(void) sprintf(treestr,", %s = %d", opt_level_key, tr->opt_level);
while (*treestr) treestr++;
(void) sprintf(treestr, ", %s = %d", smoothed_key, tr->smoothed);
while (*treestr) treestr++;
(void) sprintf(treestr, "]%s", form == treeProlog ? ", " : " ");
while (*treestr) treestr++;
}
}
if (p->tip) {
if (form != treePHYLIP) *treestr++ = '\'';
nameptr = p->name;
while (c = *nameptr++) {
if (form != treePHYLIP) {if (c == '\'') *treestr++ = '\'';}
else if (c == ' ') {c = '_';}
*treestr++ = c;
}
if (form != treePHYLIP) *treestr++ = '\'';
}
else {
*treestr++ = '(';
treestr = treeString(treestr, tr, p->next->back, form);
*treestr++ = ',';
treestr = treeString(treestr, tr, p->next->next->back, form);
if (p == tr->start->back) {
*treestr++ = ',';
treestr = treeString(treestr, tr, p->back, form);
}
*treestr++ = ')';
}
if (p == tr->start->back) {
(void) sprintf(treestr, ":0.0%s\n", (form != treeProlog) ? ";" : ").");
}
else {
z = p->z;
if (z < zmin) z = zmin;
x = -log(z) * tr->rdta->fracchange;
(void) sprintf(treestr, ": %8.6f", x); /* prolog needs the space */
}
while (*treestr) treestr++; /* move pointer up to null termination */
return treestr;
} /* treeString */
void treeOut (treefile, tr, form)
FILE *treefile;
tree *tr;
int form;
/* write out file with representation of final tree */
{ /* treeOut */
int c;
char *cptr, *treestr;
treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256);
if (! treestr) {
fprintf(stderr, "treeOut: Malloc failure\n");
exit(1);
}
(void) treeString(treestr, tr, tr->start->back, form);
cptr = treestr;
while (c = *cptr++) putc(c, treefile);
Free(treestr);
} /* treeOut */
/*=======================================================================*/
/* Read a tree from a file */
/*=======================================================================*/
/* 1.0.A Processing of quotation marks in comment removed
*/
int treeFinishCom (fp, strp)
FILE *fp;
char **strp;
{ /* treeFinishCom */
int ch;
while ((ch = getc(fp)) != EOF && ch != ']') {
if (strp != NULL) *(*strp)++ = ch; /* save character */
if (ch == '[') { /* nested comment; find its end */
if ((ch = treeFinishCom(fp, strp)) == EOF) break;
if (strp != NULL) *(*strp)++ = ch; /* save closing ] */
}
}
if (strp != NULL) **strp = '\0'; /* terminate string */
return ch;
} /* treeFinishCom */
int treeGetCh (fp)
FILE *fp; /* get next nonblank, noncomment character */
{ /* treeGetCh */
int ch;
while ((ch = getc(fp)) != EOF) {
if (whitechar(ch)) ;
else if (ch == '[') { /* comment; find its end */
if ((ch = treeFinishCom(fp, (char **) NULL)) == EOF) break;
}
else break;
}
return ch;
} /* treeGetCh */
boolean treeLabelEnd (ch)
int ch;
{ /* treeLabelEnd */
switch (ch) {
case EOF: case '\0': case '\t': case '\n': case ' ':
case ':': case ',': case '(': case ')': case '[':
case ';':
return TRUE;
default:
break;
}
return FALSE;
} /* treeLabelEnd */
boolean treeGetLabel (fp, lblPtr, maxlen)
FILE *fp;
char *lblPtr;
int maxlen;
{ /* treeGetLabel */
int ch;
boolean done, quoted, lblfound;
if (--maxlen < 0) lblPtr = (char *) NULL; /* reserves space for '\0' */
else if (lblPtr == NULL) maxlen = 0;
ch = getc(fp);
done = treeLabelEnd(ch);
lblfound = ! done;
quoted = (ch == '\'');
if (quoted && ! done) {ch = getc(fp); done = (ch == EOF);}
while (! done) {
if (quoted) {
if (ch == '\'') {ch = getc(fp); if (ch != '\'') break;}
}
else if (treeLabelEnd(ch)) break;
else if (ch == '_') ch = ' '; /* unquoted _ goes to space */
if (--maxlen >= 0) *lblPtr++ = ch;
ch = getc(fp);
if (ch == EOF) break;
}
if (ch != EOF) (void) ungetc(ch, fp);
if (lblPtr != NULL) *lblPtr = '\0';
return lblfound;
} /* treeGetLabel */
boolean treeFlushLabel (fp)
FILE *fp;
{ /* treeFlushLabel */
return treeGetLabel(fp, (char *) NULL, (int) 0);
} /* treeFlushLabel */
int treeFindTipByLabel (str, tr)
char *str; /* label string pointer */
tree *tr;
{ /* treeFindTipByLabel */
nodeptr q;
char *nameptr;
int ch, i, n;
boolean found;
for (n = 1; n <= tr->mxtips; n++) {
q = tr->nodep[n];
if (! (q->back)) { /* Only consider unused tips */
i = 0;
nameptr = q->name;
while ((found = (str[i++] == (ch = *nameptr++))) && ch) ;
if (found) return n;
}
}
printf("ERROR: Cannot find tree species: %s\n", str);
return 0;
} /* treeFindTipByLabel */
int treeFindTipName (fp, tr)
FILE *fp;
tree *tr;
{ /* treeFindTipName */
char *nameptr, str[nmlngth+2];
int n;
if (tr->prelabeled) {
if (treeGetLabel(fp, str, nmlngth+2))
n = treeFindTipByLabel(str, tr);
else
n = 0;
}
else if (tr->ntips < tr->mxtips) {
n = tr->ntips + 1;
nameptr = tr->nodep[n]->name;
if (! treeGetLabel(fp, nameptr, nmlngth+1)) n = 0;
}
else {
n = 0;
}
return n;
} /* treeFindTipName */
void treeEchoContext (fp1, fp2, n)
FILE *fp1, *fp2;
int n;
{ /* treeEchoContext */
int ch;
boolean waswhite;
waswhite = TRUE;
while (n > 0 && ((ch = getc(fp1)) != EOF)) {
if (whitechar(ch)) {
ch = waswhite ? '\0' : ' ';
waswhite = TRUE;
}
else {
waswhite = FALSE;
}
if (ch > '\0') {putc(ch, fp2); n--;}
}
} /* treeEchoContext */
boolean treeProcessLength (fp, dptr)
FILE *fp;
double *dptr;
{ /* treeProcessLength */
int ch;
if ((ch = treeGetCh(fp)) == EOF) return FALSE; /* Skip comments */
(void) ungetc(ch, fp);
if (fscanf(fp, "%lf", dptr) != 1) {
printf("ERROR: treeProcessLength: Problem reading branch length\n");
treeEchoContext(fp, stdout, 40);
printf("\n");
return FALSE;
}
return TRUE;
} /* treeProcessLength */
boolean treeFlushLen (fp)
FILE *fp;
{ /* treeFlushLen */
double dummy;
int ch;
if ((ch = treeGetCh(fp)) == ':') return treeProcessLength(fp, & dummy);
if (ch != EOF) (void) ungetc(ch, fp);
return TRUE;
} /* treeFlushLen */
boolean treeNeedCh (fp, c1, where)
FILE *fp;
int c1;
char *where;
{ /* treeNeedCh */
int c2;
if ((c2 = treeGetCh(fp)) == c1) return TRUE;
printf("ERROR: Expecting '%c' %s tree; found:", c1, where);
if (c2 == EOF) {
printf("End-of-File");
}
else {
ungetc(c2, fp);
treeEchoContext(fp, stdout, 40);
}
putchar('\n');
return FALSE;
} /* treeNeedCh */
boolean addElementLen (fp, tr, p)
FILE *fp;
tree *tr;
nodeptr p;
{ /* addElementLen */
double z, branch;
nodeptr q;
int n, ch;
if ((ch = treeGetCh(fp)) == '(') { /* A new internal node */
n = (tr->nextnode)++;
if (n > 2*(tr->mxtips) - 2) {
if (tr->rooted || n > 2*(tr->mxtips) - 1) {
printf("ERROR: Too many internal nodes. Is tree rooted?\n");
printf(" Deepest splitting should be a trifurcation.\n");
return FALSE;
}
else {
tr->rooted = TRUE;
}
}
q = tr->nodep[n];
if (! addElementLen(fp, tr, q->next)) return FALSE;
if (! treeNeedCh(fp, ',', "in")) return FALSE;
if (! addElementLen(fp, tr, q->next->next)) return FALSE;
if (! treeNeedCh(fp, ')', "in")) return FALSE;
(void) treeFlushLabel(fp);
}
else { /* A new tip */
ungetc(ch, fp);
if ((n = treeFindTipName(fp, tr)) <= 0) return FALSE;
q = tr->nodep[n];
if (tr->start->number > n) tr->start = q;
(tr->ntips)++;
} /* End of tip processing */
if (tr->userlen) {
if (! treeNeedCh(fp, ':', "in")) return FALSE;
if (! treeProcessLength(fp, & branch)) return FALSE;
z = exp(-branch / tr->rdta->fracchange);
if (z > zmax) z = zmax;
hookup(p, q, z);
}
else {
if (! treeFlushLen(fp)) return FALSE;
hookup(p, q, defaultz);
}
return TRUE;
} /* addElementLen */
int saveTreeCom (comstrp)
char **comstrp;
{ /* saveTreeCom */
int ch;
boolean inquote;
inquote = FALSE;
while ((ch = getc(INFILE)) != EOF && (inquote || ch != ']')) {
*(*comstrp)++ = ch; /* save character */
if (ch == '[' && ! inquote) { /* comment; find its end */
if ((ch = saveTreeCom(comstrp)) == EOF) break;
*(*comstrp)++ = ch; /* add ] */
}
else if (ch == '\'') inquote = ! inquote; /* start or end of quote */
}
return ch;
} /* saveTreeCom */
boolean processTreeCom (fp, tr)
FILE *fp;
tree *tr;
{ /* processTreeCom */
int text_started, functor_read, com_open;
/* Accept prefatory "phylip_tree(" or "pseudoNewick(" */
functor_read = text_started = 0;
(void) fscanf(fp, " p%nhylip_tree(%n", & text_started, & functor_read);
if (text_started && ! functor_read) {
(void) fscanf(fp, "seudoNewick(%n", & functor_read);
if (! functor_read) {
printf("Start of tree 'p...' not understood.\n");
return FALSE;
}
}
com_open = 0;
(void) fscanf(fp, " [%n", & com_open);
if (com_open) { /* comment; read it */
char com[1024], *com_end;
com_end = com;
if (treeFinishCom(fp, & com_end) == EOF) { /* omits enclosing []s */
printf("Missing end of tree comment\n");
return FALSE;
}
(void) readKeyValue(com, likelihood_key, "%lg",
(void *) &(tr->likelihood));
(void) readKeyValue(com, opt_level_key, "%d",
(void *) &(tr->opt_level));
(void) readKeyValue(com, smoothed_key, "%d",
(void *) &(tr->smoothed));
if (functor_read) (void) fscanf(fp, " ,"); /* remove trailing comma */
}
return (functor_read > 0);
} /* processTreeCom */
nodeptr uprootTree (tr, p)
tree *tr;
nodeptr p;
{ /* uprootTree */
nodeptr q, r, s, start;
int n;
if (p->tip || p->back) {
printf("ERROR: Unable to uproot tree.\n");
printf(" Inappropriate node marked for removal.\n");
return (nodeptr) NULL;
}
n = --(tr->nextnode); /* last internal node added */
if (n != tr->mxtips + tr->ntips - 1) {
printf("ERROR: Unable to uproot tree. Inconsistent\n");
printf(" number of tips and nodes for rooted tree.\n");
return (nodeptr) NULL;
}
q = p->next->back; /* remove p from tree */
r = p->next->next->back;
hookup(q, r, tr->userlen ? (q->z * r->z) : defaultz);
start = (r->tip || (! q->tip)) ? r : r->next->next->back;
if (tr->ntips > 2 && p->number != n) {
q = tr->nodep[n]; /* transfer last node's conections to p */
r = q->next;
s = q->next->next;
hookup(p, q->back, q->z); /* move connections to p */
hookup(p->next, r->back, r->z);
hookup(p->next->next, s->back, s->z);
if (start->number == q->number) start = start->back->back;
q->back = r->back = s->back = (nodeptr) NULL;
}
else {
p->back = p->next->back = p->next->next->back = (nodeptr) NULL;
}
tr->rooted = FALSE;
return start;
} /* uprootTree */
boolean treeReadLen (fp, tr)
FILE *fp;
tree *tr;
{ /* treeReadLen */
nodeptr p;
int i, ch;
boolean is_fact;
for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL;
tr->start = tr->nodep[tr->mxtips];
tr->ntips = 0;
tr->nextnode = tr->mxtips + 1;
tr->opt_level = 0;
tr->log_f_valid = 0;
tr->smoothed = FALSE;
tr->rooted = FALSE;
is_fact = processTreeCom(fp, tr);
p = tr->nodep[(tr->nextnode)++];
if (! treeNeedCh(fp, '(', "at start of")) return FALSE;
if (! addElementLen(fp, tr, p)) return FALSE;
if (! treeNeedCh(fp, ',', "in")) return FALSE;
if (! addElementLen(fp, tr, p->next)) return FALSE;
if (! tr->rooted) {
if ((ch = treeGetCh(fp)) == ',') { /* An unrooted format */
if (! addElementLen(fp, tr, p->next->next)) return FALSE;
}
else { /* A rooted format */
tr->rooted = TRUE;
if (ch != EOF) (void) ungetc(ch, fp);
}
}
else {
p->next->next->back = (nodeptr) NULL;
}
if (! treeNeedCh(fp, ')', "in")) return FALSE;
(void) treeFlushLabel(fp);
if (! treeFlushLen(fp)) return FALSE;
if (is_fact) {
if (! treeNeedCh(fp, ')', "at end of")) return FALSE;
if (! treeNeedCh(fp, '.', "at end of")) return FALSE;
}
else {
if (! treeNeedCh(fp, ';', "at end of")) return FALSE;
}
if (tr->rooted) {
p->next->next->back = (nodeptr) NULL;
tr->start = uprootTree(tr, p->next->next);
if (! tr->start) return FALSE;
}
else {
tr->start = p->next->next->back; /* This is start used by treeString */
}
return (initrav(tr, tr->start) && initrav(tr, tr->start->back));
} /* treeReadLen */
/*=======================================================================*/
/* Read a tree from a string */
/*=======================================================================*/
#if Master || Slave
int str_treeFinishCom (treestrp, strp)
char **treestrp; /* tree string pointer */
char **strp; /* comment string pointer */
{ /* str_treeFinishCom */
int ch;
while ((ch = *(*treestrp)++) != NULL && ch != ']') {
if (strp != NULL) *(*strp)++ = ch; /* save character */
if (ch == '[') { /* nested comment; find its end */
if ((ch = str_treeFinishCom(treestrp)) == NULL) break;
if (strp != NULL) *(*strp)++ = ch; /* save closing ] */
}
}
if (strp != NULL) **strp = '\0'; /* terminate string */
return ch;
} /* str_treeFinishCom */
int str_treeGetCh (treestrp)
char **treestrp; /* tree string pointer */
/* get next nonblank, noncomment character */
{ /* str_treeGetCh */
int ch;
while ((ch = *(*treestrp)++) != NULL) {
if (whitechar(ch)) ;
else if (ch == '[') { /* comment; find its end */
if ((ch = str_treeFinishCom(treestrp, (char *) NULL)) == NULL) break;
}
else break;
}
return ch;
} /* str_treeGetCh */
boolean str_treeGetLabel (treestrp, lblPtr, maxlen)
char **treestrp; /* tree string pointer */
char *lblPtr;
int maxlen;
{ /* str_treeGetLabel */
int ch;
boolean done, quoted, lblfound;
if (--maxlen < 0) lblPtr = (char *) NULL; /* reserves space for '\0' */
else if (lblPtr == NULL) maxlen = 0;
ch = *(*treestrp)++;
done = treeLabelEnd(ch);
lblfound = ! done;
quoted = (ch == '\'');
if (quoted && ! done) {ch = *(*treestrp)++; done = (ch == '\0');}
while (! done) {
if (quoted) {
if (ch == '\'') {ch = *(*treestrp)++; if (ch != '\'') break;}
}
else if (treeLabelEnd(ch)) break;
else if (ch == '_') ch = ' '; /* unquoted _ goes to space */
if (--maxlen >= 0) *lblPtr++ = ch;
ch = *(*treestrp)++;
if (ch == '\0') break;
}
(*treestrp)--;
if (lblPtr != NULL) *lblPtr = '\0';
return lblfound;
} /* str_treeGetLabel */
boolean str_treeFlushLabel (treestrp)
char **treestrp; /* tree string pointer */
{ /* str_treeFlushLabel */
return str_treeGetLabel(treestrp, (char *) NULL, (int) 0);
} /* str_treeFlushLabel */
int str_treeFindTipName (treestrp, tr)
char **treestrp; /* tree string pointer */
tree *tr;
{ /* str_treeFindTipName */
nodeptr q;
char *nameptr, str[nmlngth+2];
int ch, i, n;
if (tr->prelabeled) {
if (str_treeGetLabel(treestrp, str, nmlngth+2))
n = treeFindTipByLabel(str, tr);
else
n = 0;
}
else if (tr->ntips < tr->mxtips) {
n = tr->ntips + 1;
nameptr = tr->nodep[n]->name;
if (! str_treeGetLabel(treestrp, nameptr, nmlngth+1)) n = 0;
}
else {
n = 0;
}
return n;
} /* str_treeFindTipName */
boolean str_treeProcessLength (treestrp, dptr)
char **treestrp; /* tree string ponter */
double *dptr;
{ /* str_treeProcessLength */
int used;
if(! str_treeGetCh(treestrp)) return FALSE; /* Skip comments */
(*treestrp)--;
if (sscanf(*treestrp, "%lf%n", dptr, & used) != 1) {
printf("ERROR: str_treeProcessLength: Problem reading branch length\n");
printf("%40s\n", *treestrp);
*dptr = 0.0;
return FALSE;
}
else {
*treestrp += used;
}
return TRUE;
} /* str_treeProcessLength */
boolean str_treeFlushLen (treestrp)
char **treestrp; /* tree string ponter */
{ /* str_treeFlushLen */
int ch;
if ((ch = str_treeGetCh(treestrp)) == ':')
return str_treeProcessLength(treestrp, (double *) NULL);
else {
(*treestrp)--;
return TRUE;
}
} /* str_treeFlushLen */
boolean str_treeNeedCh (treestrp, c1, where)
char **treestrp; /* tree string pointer */
int c1;
char *where;
{ /* str_treeNeedCh */
int c2, i;
if ((c2 = str_treeGetCh(treestrp)) == c1) return TRUE;
printf("ERROR: Missing '%c' %s tree; ", c1, where);
if (c2 == '\0')
printf("end-of-string");
else {
putchar('"');
for (i = 24; i-- && (c2 != '\0'); c2 = *(*treestrp)++) putchar(c2);
putchar('"');
}
printf(" found instead\n");
return FALSE;
} /* str_treeNeedCh */
boolean str_addElementLen (treestrp, tr, p)
char **treestrp; /* tree string pointer */
tree *tr;
nodeptr p;
{ /* str_addElementLen */
double z, branch;
nodeptr q;
int n, ch;
if ((ch = str_treeGetCh(treestrp)) == '(') { /* A new internal node */
n = (tr->nextnode)++;
if (n > 2*(tr->mxtips) - 2) {
if (tr->rooted || n > 2*(tr->mxtips) - 1) {
printf("ERROR: too many internal nodes. Is tree rooted?\n");
printf("Deepest splitting should be a trifurcation.\n");
return FALSE;
}
else {
tr->rooted = TRUE;
}
}
q = tr->nodep[n];
if (! str_addElementLen(treestrp, tr, q->next)) return FALSE;
if (! str_treeNeedCh(treestrp, ',', "in")) return FALSE;
if (! str_addElementLen(treestrp, tr, q->next->next)) return FALSE;
if (! str_treeNeedCh(treestrp, ')', "in")) return FALSE;
if (! str_treeFlushLabel(treestrp)) return FALSE;
}
else { /* A new tip */
n = str_treeFindTipName(treestrp, tr, ch);
if (n <= 0) return FALSE;
q = tr->nodep[n];
if (tr->start->number > n) tr->start = q;
(tr->ntips)++;
} /* End of tip processing */
/* Master and Slave always use lengths */
if (! str_treeNeedCh(treestrp, ':', "in")) return FALSE;
if (! str_treeProcessLength(treestrp, & branch)) return FALSE;
z = exp(-branch / tr->rdta->fracchange);
if (z > zmax) z = zmax;
hookup(p, q, z);
return TRUE;
} /* str_addElementLen */
boolean str_processTreeCom(tr, treestrp)
tree *tr;
char **treestrp;
{ /* str_processTreeCom */
char *com, *com_end;
int text_started, functor_read, com_open;
com = *treestrp;
functor_read = text_started = 0;
sscanf(com, " p%nhylip_tree(%n", & text_started, & functor_read);
if (functor_read) {
com += functor_read;
}
else if (text_started) {
com += text_started;
sscanf(com, "seudoNewick(%n", & functor_read);
if (! functor_read) {
printf("Start of tree 'p...' not understood.\n");
return FALSE;
}
else {
com += functor_read;
}
}
com_open = 0;
sscanf(com, " [%n", & com_open);
com += com_open;
if (com_open) { /* comment; read it */
if (!(com_end = strchr(com, ']'))) {
printf("Missing end of tree comment.\n");
return FALSE;
}
*com_end = 0;
(void) readKeyValue(com, likelihood_key, "%lg",
(void *) &(tr->likelihood));
(void) readKeyValue(com, opt_level_key, "%d",
(void *) &(tr->opt_level));
(void) readKeyValue(com, smoothed_key, "%d",
(void *) &(tr->smoothed));
*com_end = ']';
com_end++;
if (functor_read) { /* remove trailing comma */
text_started = 0;
sscanf(com_end, " ,%n", & text_started);
com_end += text_started;
}
*treestrp = com_end;
}
return (functor_read > 0);
} /* str_processTreeCom */
boolean str_treeReadLen (treestr, tr)
char *treestr;
tree *tr;
/* read string with representation of tree */
{ /* str_treeReadLen */
nodeptr p;
int i;
boolean is_fact, found;
for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL;
tr->start = tr->nodep[tr->mxtips];
tr->ntips = 0;
tr->nextnode = tr->mxtips + 1;
tr->opt_level = 0;
tr->log_f_valid = 0;
tr->smoothed = Master;
tr->rooted = FALSE;
is_fact = str_processTreeCom(tr, & treestr);
p = tr->nodep[(tr->nextnode)++];
if (! str_treeNeedCh(& treestr, '(', "at start of")) return FALSE;
if (! str_addElementLen(& treestr, tr, p)) return FALSE;
if (! str_treeNeedCh(& treestr, ',', "in")) return FALSE;
if (! str_addElementLen(& treestr, tr, p->next)) return FALSE;
if (! tr->rooted) {
if (str_treeGetCh(& treestr) == ',') { /* An unrooted format */
if (! str_addElementLen(& treestr, tr, p->next->next)) return FALSE;
}
else { /* A rooted format */
p->next->next->back = (nodeptr) NULL;
tr->rooted = TRUE;
treestr--;
}
}
if (! str_treeNeedCh(& treestr, ')', "in")) return FALSE;
if (! str_treeFlushLabel(& treestr)) return FALSE;
if (! str_treeFlushLen(& treestr)) return FALSE;
if (is_fact) {
if (! str_treeNeedCh(& treestr, ')', "at end of")) return FALSE;
if (! str_treeNeedCh(& treestr, '.', "at end of")) return FALSE;
}
else {
if (! str_treeNeedCh(& treestr, ';', "at end of")) return FALSE;
}
if (tr->rooted) if (! uprootTree(tr, p->next->next)) return FALSE;
tr->start = p->next->next->back; /* This is start used by treeString */
return (initrav(tr, tr->start) && initrav(tr, tr->start->back));
} /* str_treeReadLen */
#endif
boolean treeEvaluate (tr, bt) /* Evaluate a user tree */
tree *tr;
bestlist *bt;
{ /* treeEvaluate */
if (Slave || ! tr->userlen) {
if (! smoothTree(tr, 4 * smoothings)) return FALSE;
}
if (evaluate(tr, tr->start) == badEval) return FALSE;
# if ! Slave
(void) saveBestTree(bt, tr);
# endif
return TRUE;
} /* treeEvaluate */
#if Master || Slave
FILE *freopen_pid (filenm, mode, stream)
char *filenm, *mode;
FILE *stream;
{ /* freopen_pid */
char scr[512];
(void) sprintf(scr, "%s.%d", filenm, getpid());
return freopen(scr, mode, stream);
} /* freopen_pid */
#endif
boolean showBestTrees (bt, tr, adef, treefile)
bestlist *bt;
tree *tr;
analdef *adef;
FILE *treefile;
{ /* showBestTrees */
int rank;
for (rank = 1; rank <= bt->nvalid; rank++) {
if (rank > 1) {
if (rank != recallBestTree(bt, rank, tr)) break;
}
if (evaluate(tr, tr->start) == badEval) return FALSE;
if (tr->outgrnode->back) tr->start = tr->outgrnode;
printTree(tr, adef);
summarize(tr);
if (treefile) treeOut(treefile, tr, adef->trout);
}
return TRUE;
} /* showBestTrees */
boolean cmpBestTrees (bt, tr)
bestlist *bt;
tree *tr;
{ /* cmpBestTrees */
double sum, sum2, sd, temp, wtemp, bestscore;
double *log_f0, *log_f0_ptr; /* Save a copy of best log_f */
double *log_f_ptr;
int i, j, num, besttips;
num = bt->nvalid;
if ((num <= 1) || (tr->cdta->wgtsum <= 1)) return TRUE;
if (! (log_f0 = (double *) Malloc(sizeof(double) * tr->cdta->endsite))) {
printf("ERROR: cmpBestTrees unable to obtain space for log_f0\n");
return FALSE;
}
printf("Tree Ln L Diff Ln L Its S.D.");
printf(" Significantly worse?\n\n");
for (i = 1; i <= num; i++) {
if (i != recallBestTree(bt, i, tr)) break;
if (! (tr->log_f_valid)) {
if (evaluate(tr, tr->start) == badEval) return FALSE;
}
printf("%3d%14.5f", i, tr->likelihood);
if (i == 1) {
printf(" <------ best\n");
besttips = tr->ntips;
bestscore = tr->likelihood;
log_f0_ptr = log_f0;
log_f_ptr = tr->log_f;
for (j = 0; j < tr->cdta->endsite; j++) *log_f0_ptr++ = *log_f_ptr++;
}
else if (tr->ntips != besttips)
printf(" (different number of species)\n");
else {
sum = sum2 = 0.0;
log_f0_ptr = log_f0;
log_f_ptr = tr->log_f;
for (j = 0; j < tr->cdta->endsite; j++) {
temp = *log_f0_ptr++ - *log_f_ptr++;
wtemp = tr->cdta->aliaswgt[j] * temp;
sum += wtemp;
sum2 += wtemp * temp;
}
sd = sqrt( tr->cdta->wgtsum * (sum2 - sum*sum / tr->cdta->wgtsum)
/ (tr->cdta->wgtsum - 1) );
printf("%14.5f%14.4f", tr->likelihood - bestscore, sd);
printf(" %s\n", (sum > 1.95996 * sd) ? "Yes" : " No");
}
}
Free(log_f0);
printf("\n\n");
return TRUE;
} /* cmpBestTrees */
boolean makeUserTree (tr, bt, adef)
tree *tr;
bestlist *bt;
analdef *adef;
{ /* makeUserTree */
char filename[128];
FILE *treefile;
int nusertrees, which;
nusertrees = adef->numutrees;
printf("User-defined %s:\n\n", (nusertrees == 1) ? "tree" : "trees");
treefile = adef->trout ? fopen_pid("treefile", "w", filename) : (FILE *) NULL;
for (which = 1; which <= nusertrees; which++) {
if (! treeReadLen(INFILE, tr)) return FALSE;
if (! treeEvaluate(tr, bt)) return FALSE;
if (tr->global <= 0) {
if (tr->outgrnode->back) tr->start = tr->outgrnode;
printTree(tr, adef);
summarize(tr);
if (treefile) treeOut(treefile, tr, adef->trout);
}
else {
printf("%6d: Ln Likelihood =%14.5f\n", which, tr->likelihood);
}
}
if (tr->global > 0) {
putchar('\n');
if (! recallBestTree(bt, 1, tr)) return FALSE;
printf(" Ln Likelihood =%14.5f\n", tr->likelihood);
if (! optimize(tr, tr->global, bt)) return FALSE;
if (tr->outgrnode->back) tr->start = tr->outgrnode;
printTree(tr, adef);
summarize(tr);
if (treefile) treeOut(treefile, tr, adef->trout);
}
if (treefile) {
(void) fclose(treefile);
printf("Tree also written to %s\n", filename);
}
putchar('\n');
(void) cmpBestTrees(bt, tr);
return TRUE;
} /* makeUserTree */
#if Slave
boolean slaveTreeEvaluate (tr, bt)
tree *tr;
bestlist *bt;
{ /* slaveTreeEvaluate */
boolean done;
do {
requestForWork();
if (! receiveTree(& comm_master, tr)) return FALSE;
done = tr->likelihood > 0.0;
if (! done) {
if (! treeEvaluate(tr, bt)) return FALSE;
if (! sendTree(& comm_master, tr)) return FALSE;
}
} while (! done);
return TRUE;
} /* slaveTreeEvaluate */
#endif
double randum (seed)
long *seed;
/* random number generator, modified to use 12 bit chunks */
{ /* randum */
long sum, mult0, mult1, seed0, seed1, seed2, newseed0, newseed1, newseed2;
mult0 = 1549;
seed0 = *seed & 4095;
sum = mult0 * seed0;
newseed0 = sum & 4095;
sum >>= 12;
seed1 = (*seed >> 12) & 4095;
mult1 = 406;
sum += mult0 * seed1 + mult1 * seed0;
newseed1 = sum & 4095;
sum >>= 12;
seed2 = (*seed >> 24) & 255;
sum += mult0 * seed2 + mult1 * seed1;
newseed2 = sum & 255;
*seed = newseed2 << 24 | newseed1 << 12 | newseed0;
return 0.00390625 * (newseed2
+ 0.000244140625 * (newseed1
+ 0.000244140625 * newseed0));
} /* randum */
boolean makeDenovoTree (tr, bt, adef)
tree *tr;
bestlist *bt;
analdef *adef;
{ /* makeDenovoTree */
char filename[128];
FILE *treefile;
nodeptr p;
int *enterorder; /* random entry order */
int i, j, k, nextsp, newsp, maxtrav, tested;
double randum();
enterorder = (int *) Malloc(sizeof(int) * (tr->mxtips + 1));
if (! enterorder) {
fprintf(stderr, "makeDenovoTree: Malloc failure for enterorder\n");
return 0;
}
if (adef->restart) {
printf("Restarting from tree with the following sequences:\n");
tr->userlen = TRUE;
if (! treeReadLen(INFILE, tr)) return FALSE;
if (! smoothTree(tr, smoothings)) return FALSE;
if (evaluate(tr, tr->start) == badEval) return FALSE;
if (saveBestTree(bt, tr) < 1) return FALSE;
for (i = 1, j = tr->ntips; i <= tr->mxtips; i++) { /* find loose tips */
if (! tr->nodep[i]->back) {
enterorder[++j] = i;
}
else {
printf(" %s\n", tr->nodep[i]->name);
# if Master
if (i>3) REPORT_ADD_SPECS;
# endif
}
}
putchar('\n');
}
else { /* start from scratch */
tr->ntips = 0;
for (i = 1; i <= tr->mxtips; i++) enterorder[i] = i;
}
if (adef->jumble) for (i = tr->ntips + 1; i <= tr->mxtips; i++) {
j = randum(&(adef->jumble))*(tr->mxtips - tr->ntips) + tr->ntips + 1;
k = enterorder[j];
enterorder[j] = enterorder[i];
enterorder[i] = k;
}
bt->numtrees = 1;
if (tr->ntips < tr->mxtips) printf("Adding species:\n");
if (tr->ntips == 0) {
for (i = 1; i <= 3; i++) {
printf(" %s\n", tr->nodep[enterorder[i]]->name);
}
tr->nextnode = tr->mxtips + 1;
if (! buildSimpleTree(tr, enterorder[1], enterorder[2], enterorder[3]))
return FALSE;
}
while (tr->ntips < tr->mxtips || tr->opt_level < tr->global) {
maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap;
if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3;
if (tr->opt_level >= maxtrav) {
nextsp = ++(tr->ntips);
newsp = enterorder[nextsp];
p = tr->nodep[newsp];
printf(" %s\n", p->name);
# if Master
if (nextsp % DNAML_STEP_TIME_COUNT == 1) {
REPORT_STEP_TIME;
}
REPORT_ADD_SPECS;
# endif
(void) buildNewTip(tr, p);
resetBestTree(bt);
cacheZ(tr);
tested = addTraverse(tr, p->back, findAnyTip(tr->start)->back,
1, tr->ntips - 2, bt, adef->qadd);
if (tested == badRear) return FALSE;
bt->numtrees += tested;
# if Master
getReturnedTrees(tr, bt, tested);
# endif
printf(" Tested %d alternative trees\n", tested);
(void) recallBestTree(bt, 1, tr);
if (! tr->smoothed) {
if (! smoothTree(tr, smoothings)) return FALSE;
if (evaluate(tr, tr->start) == badEval) return FALSE;
(void) saveBestTree(bt, tr);
}
if (tr->ntips == 4) tr->opt_level = 1; /* All 4 taxon trees done */
maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap;
if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3;
}
printf(" Ln Likelihood =%14.5f\n", tr->likelihood);
if (! optimize(tr, maxtrav, bt)) return FALSE;
}
printf("\nExamined %d %s\n", bt->numtrees,
bt->numtrees != 1 ? "trees" : "tree");
treefile = adef->trout ? fopen_pid("treefile", "w", filename) : (FILE *) NULL;
(void) showBestTrees(bt, tr, adef, treefile);
if (treefile) {
(void) fclose(treefile);
printf("Tree also written to %s\n\n", filename);
}
(void) cmpBestTrees(bt, tr);
# if DeleteCheckpointFile
unlink_pid(checkpointname);
# endif
Free(enterorder);
return TRUE;
} /* makeDenovoTree */
/*==========================================================================*/
/* "main" routine */
/*==========================================================================*/
#if defined(MACINTOSH) || defined(MSDOS)
int RealMain(int argc, char** argv)
#else
#if Sequential
main ()
#else
slave ()
#endif
#endif
{ /* DNA Maximum Likelihood */
# if Master
int starttime, inputtime, endtime;
# endif
# if Sequential
/* dgg addition */
long starttime, inputtime, endtime;
double millisecs;
# endif
# if Master || Slave
int my_id, nprocs, type, from, sz;
char *msg;
# endif
analdef *adef;
rawDNA *rdta;
crunchedDNA *cdta;
tree *tr; /* current tree */
bestlist *bt; /* topology of best found tree */
# if Debug
{
char debugfilename[128];
debug = fopen_pid("dnaml_debug", "w", debugfilename);
}
# endif
# if Master
starttime = p4_clock();
nprocs = p4_num_total_slaves();
if ((OUTFILE = freopen_pid("master.out", "w", stdout)) == NULL) {
fprintf(stderr, "Could not open output file\n");
exit(1);
}
/* Receive input file name from host */
type = DNAML_FILE_NAME;
from = DNAML_HOST_ID;
msg = NULL;
p4_recv(& type, & from, & msg, & sz);
if ((INFILE = fopen(msg, "r")) == NULL) {
fprintf(stderr, "master could not open input file %s\n", msg);
exit(1);
}
p4_msg_free(msg);
open_link(& comm_slave);
# endif
# if Sequential
starttime = clock();
#ifdef NoSTDIN
{
char *fname = "infile";
if ((INFILE = fopen(fname, "r")) == NULL)
do {
char filename[512];
filename[0]= 0;
fprintf(stderr, "Could not open input file '%s'\n", fname);
fprintf(stderr, "Enter path-name for data file: ");
scanf( "%s", filename);
fname= filename;
INFILE = fopen(fname, "r");
} while (!INFILE && *fname > ' ');
if (INFILE == NULL) {
fprintf(stderr, "Could not open input file '%s'\n", fname);
exit(1);
}
}
#endif
# endif
# if DNAML_STEP
begin_step_time = starttime;
# endif
# if Slave
my_id = p4_get_my_id();
nprocs = p4_num_total_slaves();
/* Receive input file name from host */
type = DNAML_FILE_NAME;
from = DNAML_HOST_ID;
msg = NULL;
p4_recv(& type, & from, & msg, & sz);
if ((INFILE = fopen(msg, "r")) == NULL) {
fprintf(stderr, "slave could not open input file %s\n",msg);
exit(1);
}
p4_msg_free(msg);
# ifdef P4DEBUG
if ((OUTFILE = freopen_pid("slave.out", "w", stdout)) == NULL) {
fprintf(stderr, "Could not open output file\n");
exit(1);
}
# else
if ((OUTFILE = freopen("/dev/null", "w", stdout)) == NULL) {
fprintf(stderr, "Could not open output file\n");
exit(1);
}
# endif
open_link(& comm_master);
# endif
/* Get data structure memory */
if (! (adef = (analdef *) Malloc(sizeof(analdef)))) {
printf("ERROR: Unable to get memory for analysis definition\n\n");
return 1;
}
if (! (rdta = (rawDNA *) Malloc(sizeof(rawDNA)))) {
printf("ERROR: Unable to get memory for raw DNA\n\n");
return 1;
}
if (! (cdta = (crunchedDNA *) Malloc(sizeof(crunchedDNA)))) {
printf("ERROR: Unable to get memory for crunched DNA\n\n");
return 1;
}
if ((tr = (tree *) Malloc(sizeof(tree))) &&
(bt = (bestlist *) Malloc(sizeof(bestlist)))) ;
else {
printf("ERROR: Unable to get memory for trees\n\n");
return 1;
}
bt->ninit = 0;
if (! getinput(adef, rdta, cdta, tr)) return 1;
# if Master
inputtime = p4_clock();
printf("Input time %d milliseconds\n", inputtime - starttime);
REPORT_STEP_TIME;
# endif
# if Slave
(void) fclose(INFILE);
# endif
# if Sequential
inputtime = clock();
millisecs= TIMETOSECS(inputtime - starttime);
printf("Input time %6.4f seconds\n",millisecs);
# ifdef NoSTDIN
(void) fclose(INFILE);
# endif
# endif
/* The material below would be a loop over jumbles and/or boots */
if (! makeweights(adef, rdta, cdta)) return 1;
if (! makevalues(rdta, cdta)) return 1;
if (adef->empf && ! empiricalfreqs(rdta, cdta)) return 1;
reportfreqs(adef, rdta);
if (! linkdata2tree(rdta, cdta, tr)) return 1;
if (! linkxarray(3, 3, cdta->endsite, & freextip, & usedxtip)) return 1;
if (! setupnodex(tr)) return 1;
# if Slave
if (! slaveTreeEvaluate(tr, bt)) return 1;
# else
if (! initBestTree(bt, adef->nkeep, tr->mxtips, tr->cdta->endsite)) return 1;
if (! adef->usertree) {
if (! makeDenovoTree(tr, bt, adef)) return 1;
}
else {
if (! makeUserTree(tr, bt, adef)) return 1;
}
if (! freeBestTree(bt)) return 1;
# endif
/* Endpoint for jumble and/or boot loop */
# if Master
tr->likelihood = 1.0; /* terminate slaves */
(void) sendTree(& comm_slave, tr);
# endif
freeTree(tr);
# if Master
close_link(& comm_slave);
(void) fclose(INFILE);
REPORT_STEP_TIME;
endtime = p4_clock();
printf("Execution time %d milliseconds\n", endtime - inputtime);
(void) fclose(OUTFILE);
# endif
# if Slave
close_link(& comm_master);
(void) fclose(OUTFILE);
# endif
# if Sequential
/* dgg addition */
endtime = clock();
millisecs= TIMETOSECS(endtime - inputtime);
printf("Execution time %6.4f seconds\n", millisecs);
# endif
# if Debug
(void) fclose(debug);
# endif
# if Master || Slave
p4_send(DNAML_DONE, DNAML_HOST_ID, NULL, 0);
# else
return 0;
# endif
} /* DNA Maximum Likelihood */